D7net
Home
Console
Upload
information
Create File
Create Folder
About
Tools
:
/
opt
/
cpanel
/
perl5
/
536
/
site_lib
/
Perl
/
Tidy
/
Filename :
Formatter.pm
back
Copy
#################################################################### # # The Perl::Tidy::Formatter package adds indentation, whitespace, and # line breaks to the token stream # ##################################################################### # Index... # CODE SECTION 1: Preliminary code, global definitions and sub new # sub new # CODE SECTION 2: Some Basic Utilities # CODE SECTION 3: Check and process options # sub check_options # CODE SECTION 4: Receive lines from the tokenizer # sub write_line # CODE SECTION 5: Pre-process the entire file # sub finish_formatting # CODE SECTION 6: Process line-by-line # sub process_all_lines # CODE SECTION 7: Process lines of code # process_line_of_CODE # CODE SECTION 8: Utilities for setting breakpoints # sub set_forced_breakpoint # CODE SECTION 9: Process batches of code # sub grind_batch_of_CODE # CODE SECTION 10: Code to break long statements # sub break_long_lines # CODE SECTION 11: Code to break long lists # sub break_lists # CODE SECTION 12: Code for setting indentation # CODE SECTION 13: Preparing batch of lines for vertical alignment # sub convey_batch_to_vertical_aligner # CODE SECTION 14: Code for creating closing side comments # sub add_closing_side_comment # CODE SECTION 15: Summarize # sub wrapup ####################################################################### # CODE SECTION 1: Preliminary code and global definitions up to sub new ####################################################################### package Perl::Tidy::Formatter; use strict; use warnings; # DEVEL_MODE gets switched on during automated testing for extra checking use constant DEVEL_MODE => 0; use constant EMPTY_STRING => q{}; use constant SPACE => q{ }; { #<<< A non-indenting brace to contain all lexical variables use Carp; use English qw( -no_match_vars ); use List::Util qw( min max first ); # min, max first are in Perl 5.8 our $VERSION = '20230912'; # The Tokenizer will be loaded with the Formatter ##use Perl::Tidy::Tokenizer; # for is_keyword() sub AUTOLOAD { # Catch any undefined sub calls so that we are sure to get # some diagnostic information. This sub should never be called # except for a programming error. our $AUTOLOAD; return if ( $AUTOLOAD =~ /\bDESTROY$/ ); my ( $pkg, $fname, $lno ) = caller(); my $my_package = __PACKAGE__; print {*STDERR} <<EOM; ====================================================================== Error detected in package '$my_package', version $VERSION Received unexpected AUTOLOAD call for sub '$AUTOLOAD' Called from package: '$pkg' Called from File '$fname' at line '$lno' This error is probably due to a recent programming change ====================================================================== EOM exit 1; } ## end sub AUTOLOAD sub DESTROY { my $self = shift; $self->_decrement_count(); return; } sub Die { my ($msg) = @_; Perl::Tidy::Die($msg); croak "unexpected return from Perl::Tidy::Die"; } sub Warn { my ($msg) = @_; Perl::Tidy::Warn($msg); return; } sub Fault { my ($msg) = @_; # This routine is called for errors that really should not occur # except if there has been a bug introduced by a recent program change. # Please add comments at calls to Fault to explain why the call # should not occur, and where to look to fix it. my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0); my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1); my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2); my $pkg = __PACKAGE__; my $input_stream_name = get_input_stream_name(); Die(<<EOM); ============================================================================== While operating on input stream with name: '$input_stream_name' A fault was detected at line $line0 of sub '$subroutine1' in file '$filename1' which was called from line $line1 of sub '$subroutine2' Message: '$msg' This is probably an error introduced by a recent programming change. $pkg reports VERSION='$VERSION'. ============================================================================== EOM # We shouldn't get here, but this return is to keep Perl-Critic from # complaining. return; } ## end sub Fault sub Fault_Warn { my ($msg) = @_; # This is the same as Fault except that it calls Warn instead of Die # and returns. my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0); my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1); my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2); my $input_stream_name = get_input_stream_name(); Warn(<<EOM); ============================================================================== While operating on input stream with name: '$input_stream_name' A fault was detected at line $line0 of sub '$subroutine1' in file '$filename1' which was called from line $line1 of sub '$subroutine2' Message: '$msg' This is probably an error introduced by a recent programming change. Perl::Tidy::Formatter.pm reports VERSION='$VERSION'. ============================================================================== EOM return; } ## end sub Fault_Warn sub Exit { my ($msg) = @_; Perl::Tidy::Exit($msg); croak "unexpected return from Perl::Tidy::Exit"; } # Global variables ... my ( #----------------------------------------------------------------- # Section 1: Global variables which are either always constant or # are constant after being configured by user-supplied # parameters. They remain constant as a file is being processed. # The INITIALIZER comment tells the sub responsible for initializing # each variable. Failure to initialize or re-initialize a global # variable can cause bugs which are hard to locate. #----------------------------------------------------------------- # INITIALIZER: sub check_options $rOpts, # short-cut option variables # INITIALIZER: sub initialize_global_option_vars $rOpts_add_newlines, $rOpts_add_whitespace, $rOpts_add_trailing_commas, $rOpts_blank_lines_after_opening_block, $rOpts_block_brace_tightness, $rOpts_block_brace_vertical_tightness, $rOpts_brace_follower_vertical_tightness, $rOpts_break_after_labels, $rOpts_break_at_old_attribute_breakpoints, $rOpts_break_at_old_comma_breakpoints, $rOpts_break_at_old_keyword_breakpoints, $rOpts_break_at_old_logical_breakpoints, $rOpts_break_at_old_semicolon_breakpoints, $rOpts_break_at_old_ternary_breakpoints, $rOpts_break_open_compact_parens, $rOpts_closing_side_comments, $rOpts_closing_side_comment_else_flag, $rOpts_closing_side_comment_maximum_text, $rOpts_comma_arrow_breakpoints, $rOpts_continuation_indentation, $rOpts_cuddled_paren_brace, $rOpts_delete_closing_side_comments, $rOpts_delete_old_whitespace, $rOpts_delete_side_comments, $rOpts_delete_trailing_commas, $rOpts_delete_weld_interfering_commas, $rOpts_extended_continuation_indentation, $rOpts_format_skipping, $rOpts_freeze_whitespace, $rOpts_function_paren_vertical_alignment, $rOpts_fuzzy_line_length, $rOpts_ignore_old_breakpoints, $rOpts_ignore_side_comment_lengths, $rOpts_ignore_perlcritic_comments, $rOpts_indent_closing_brace, $rOpts_indent_columns, $rOpts_indent_only, $rOpts_keep_interior_semicolons, $rOpts_line_up_parentheses, $rOpts_logical_padding, $rOpts_maximum_consecutive_blank_lines, $rOpts_maximum_fields_per_table, $rOpts_maximum_line_length, $rOpts_one_line_block_semicolons, $rOpts_opening_brace_always_on_right, $rOpts_outdent_keywords, $rOpts_outdent_labels, $rOpts_outdent_long_comments, $rOpts_outdent_long_quotes, $rOpts_outdent_static_block_comments, $rOpts_recombine, $rOpts_short_concatenation_item_length, $rOpts_space_prototype_paren, $rOpts_stack_closing_block_brace, $rOpts_static_block_comments, $rOpts_add_missing_else, $rOpts_warn_missing_else, $rOpts_tee_block_comments, $rOpts_tee_pod, $rOpts_tee_side_comments, $rOpts_variable_maximum_line_length, $rOpts_valign_code, $rOpts_valign_side_comments, $rOpts_valign_if_unless, $rOpts_whitespace_cycle, $rOpts_extended_block_tightness, $rOpts_extended_line_up_parentheses, # Static hashes # INITIALIZER: BEGIN block %is_assignment, %is_non_list_type, %is_if_unless_and_or_last_next_redo_return, %is_if_elsif_else_unless_while_until_for_foreach, %is_if_unless_while_until_for_foreach, %is_last_next_redo_return, %is_if_unless, %is_if_elsif, %is_if_unless_elsif, %is_if_unless_elsif_else, %is_elsif_else, %is_and_or, %is_chain_operator, %is_block_without_semicolon, %ok_to_add_semicolon_for_block_type, %is_opening_type, %is_closing_type, %is_opening_token, %is_closing_token, %is_ternary, %is_equal_or_fat_comma, %is_counted_type, %is_opening_sequence_token, %is_closing_sequence_token, %matching_token, %is_container_label_type, %is_die_confess_croak_warn, %is_my_our_local, %is_soft_keep_break_type, %is_indirect_object_taker, @all_operators, %is_do_follower, %is_anon_sub_brace_follower, %is_anon_sub_1_brace_follower, %is_other_brace_follower, # INITIALIZER: sub check_options $controlled_comma_style, %keep_break_before_type, %keep_break_after_type, %outdent_keyword, %keyword_paren_inner_tightness, %container_indentation_options, %tightness, %line_up_parentheses_control_hash, $line_up_parentheses_control_is_lxpl, # These can be modified by grep-alias-list # INITIALIZER: sub initialize_grep_and_friends %is_sort_map_grep, %is_sort_map_grep_eval, %is_sort_map_grep_eval_do, %is_block_with_ci, %is_keyword_returning_list, %block_type_map, # initialized in BEGIN, but may be changed %want_one_line_block, # may be changed in prepare_cuddled_block_types # INITIALIZER: sub prepare_cuddled_block_types $rcuddled_block_types, # INITIALIZER: sub initialize_whitespace_hashes %binary_ws_rules, %want_left_space, %want_right_space, # INITIALIZER: sub initialize_bond_strength_hashes %right_bond_strength, %left_bond_strength, # INITIALIZER: sub initialize_token_break_preferences %want_break_before, %break_before_container_types, # INITIALIZER: sub initialize_space_after_keyword %space_after_keyword, # INITIALIZER: sub initialize_extended_block_tightness_list %extended_block_tightness_list, # INITIALIZED BY initialize_global_option_vars %opening_vertical_tightness, %closing_vertical_tightness, %closing_token_indentation, $some_closing_token_indentation, %opening_token_right, %stack_opening_token, %stack_closing_token, # INITIALIZER: sub initialize_weld_nested_exclusion_rules %weld_nested_exclusion_rules, # INITIALIZER: sub initialize_weld_fat_comma_rules %weld_fat_comma_rules, # INITIALIZER: sub initialize_trailing_comma_rules %trailing_comma_rules, # regex patterns for text identification. # Most can be configured by user parameters. # Most are initialized in a sub make_**_pattern during configuration. # INITIALIZER: sub make_sub_matching_pattern $SUB_PATTERN, $ASUB_PATTERN, %matches_ASUB, # INITIALIZER: make_static_block_comment_pattern $static_block_comment_pattern, # INITIALIZER: sub make_static_side_comment_pattern $static_side_comment_pattern, # INITIALIZER: make_format_skipping_pattern $format_skipping_pattern_begin, $format_skipping_pattern_end, # INITIALIZER: sub make_non_indenting_brace_pattern $non_indenting_brace_pattern, # INITIALIZER: sub make_bl_pattern $bl_exclusion_pattern, # INITIALIZER: make_bl_pattern $bl_pattern, # INITIALIZER: sub make_bli_pattern $bli_exclusion_pattern, # INITIALIZER: sub make_bli_pattern $bli_pattern, # INITIALIZER: sub make_block_brace_vertical_tightness_pattern $block_brace_vertical_tightness_pattern, # INITIALIZER: sub make_blank_line_pattern $blank_lines_after_opening_block_pattern, $blank_lines_before_closing_block_pattern, # INITIALIZER: sub make_keyword_group_list_pattern $keyword_group_list_pattern, $keyword_group_list_comment_pattern, # INITIALIZER: sub make_closing_side_comment_prefix $closing_side_comment_prefix_pattern, # INITIALIZER: sub make_closing_side_comment_list_pattern $closing_side_comment_list_pattern, # Table to efficiently find indentation and max line length # from level. # INITIALIZER: sub initialize_line_length_vars @maximum_line_length_at_level, @maximum_text_length_at_level, $stress_level_alpha, $stress_level_beta, $high_stress_level, # Total number of sequence items in a weld, for quick checks # INITIALIZER: weld_containers $total_weld_count, #-------------------------------------------------------- # Section 2: Work arrays for the current batch of tokens. #-------------------------------------------------------- # These are re-initialized for each batch of code # INITIALIZER: sub initialize_batch_variables $max_index_to_go, @block_type_to_go, @type_sequence_to_go, @forced_breakpoint_to_go, @token_lengths_to_go, @summed_lengths_to_go, @levels_to_go, @leading_spaces_to_go, @reduced_spaces_to_go, @mate_index_to_go, @ci_levels_to_go, @nesting_depth_to_go, @nobreak_to_go, @old_breakpoint_to_go, @tokens_to_go, @K_to_go, @types_to_go, @inext_to_go, @parent_seqno_to_go, # forced breakpoint variables associated with each batch of code $forced_breakpoint_count, $forced_breakpoint_undo_count, $index_max_forced_break, ); BEGIN { # Index names for token variables. # Do not combine with other BEGIN blocks (c101). my $i = 0; use constant { _CI_LEVEL_ => $i++, _CUMULATIVE_LENGTH_ => $i++, _LINE_INDEX_ => $i++, _KNEXT_SEQ_ITEM_ => $i++, _LEVEL_ => $i++, _TOKEN_ => $i++, _TOKEN_LENGTH_ => $i++, _TYPE_ => $i++, _TYPE_SEQUENCE_ => $i++, # Number of token variables; must be last in list: _NVARS => $i++, }; } ## end BEGIN BEGIN { # Index names for $self variables. # Do not combine with other BEGIN blocks (c101). my $i = 0; use constant { _rlines_ => $i++, _rLL_ => $i++, _Klimit_ => $i++, _rdepth_of_opening_seqno_ => $i++, _rSS_ => $i++, _Iss_opening_ => $i++, _Iss_closing_ => $i++, _rblock_type_of_seqno_ => $i++, _ris_asub_block_ => $i++, _ris_sub_block_ => $i++, _K_opening_container_ => $i++, _K_closing_container_ => $i++, _K_opening_ternary_ => $i++, _K_closing_ternary_ => $i++, _K_first_seq_item_ => $i++, _rtype_count_by_seqno_ => $i++, _ris_function_call_paren_ => $i++, _rlec_count_by_seqno_ => $i++, _ris_broken_container_ => $i++, _ris_permanently_broken_ => $i++, _rblank_and_comment_count_ => $i++, _rhas_list_ => $i++, _rhas_broken_list_ => $i++, _rhas_broken_list_with_lec_ => $i++, _rfirst_comma_line_index_ => $i++, _rhas_code_block_ => $i++, _rhas_broken_code_block_ => $i++, _rhas_ternary_ => $i++, _ris_excluded_lp_container_ => $i++, _rlp_object_by_seqno_ => $i++, _rwant_reduced_ci_ => $i++, _rno_xci_by_seqno_ => $i++, _rbrace_left_ => $i++, _ris_bli_container_ => $i++, _rparent_of_seqno_ => $i++, _rchildren_of_seqno_ => $i++, _ris_list_by_seqno_ => $i++, _ris_cuddled_closing_brace_ => $i++, _rbreak_container_ => $i++, _rshort_nested_ => $i++, _length_function_ => $i++, _is_encoded_data_ => $i++, _fh_tee_ => $i++, _sink_object_ => $i++, _file_writer_object_ => $i++, _vertical_aligner_object_ => $i++, _logger_object_ => $i++, _radjusted_levels_ => $i++, _this_batch_ => $i++, _ris_special_identifier_token_ => $i++, _last_output_short_opening_token_ => $i++, _last_line_leading_type_ => $i++, _last_line_leading_level_ => $i++, _added_semicolon_count_ => $i++, _first_added_semicolon_at_ => $i++, _last_added_semicolon_at_ => $i++, _deleted_semicolon_count_ => $i++, _first_deleted_semicolon_at_ => $i++, _last_deleted_semicolon_at_ => $i++, _embedded_tab_count_ => $i++, _first_embedded_tab_at_ => $i++, _last_embedded_tab_at_ => $i++, _first_tabbing_disagreement_ => $i++, _last_tabbing_disagreement_ => $i++, _tabbing_disagreement_count_ => $i++, _in_tabbing_disagreement_ => $i++, _first_brace_tabbing_disagreement_ => $i++, _in_brace_tabbing_disagreement_ => $i++, _saw_VERSION_in_this_file_ => $i++, _saw_END_or_DATA_ => $i++, _rK_weld_left_ => $i++, _rK_weld_right_ => $i++, _rweld_len_right_at_K_ => $i++, _rspecial_side_comment_type_ => $i++, _rseqno_controlling_my_ci_ => $i++, _ris_seqno_controlling_ci_ => $i++, _save_logfile_ => $i++, _maximum_level_ => $i++, _maximum_level_at_line_ => $i++, _maximum_BLOCK_level_ => $i++, _maximum_BLOCK_level_at_line_ => $i++, _rKrange_code_without_comments_ => $i++, _rbreak_before_Kfirst_ => $i++, _rbreak_after_Klast_ => $i++, _converged_ => $i++, _rstarting_multiline_qw_seqno_by_K_ => $i++, _rending_multiline_qw_seqno_by_K_ => $i++, _rKrange_multiline_qw_by_seqno_ => $i++, _rmultiline_qw_has_extra_level_ => $i++, _rcollapsed_length_by_seqno_ => $i++, _rbreak_before_container_by_seqno_ => $i++, _roverride_cab3_ => $i++, _ris_assigned_structure_ => $i++, _ris_short_broken_eval_block_ => $i++, _ris_bare_trailing_comma_by_seqno_ => $i++, _rseqno_non_indenting_brace_by_ix_ => $i++, _rmax_vertical_tightness_ => $i++, _no_vertical_tightness_flags_ => $i++, _LAST_SELF_INDEX_ => $i - 1, }; } ## end BEGIN BEGIN { # Index names for batch variables. # Do not combine with other BEGIN blocks (c101). # These are stored in _this_batch_, which is a sub-array of $self. my $i = 0; use constant { _starting_in_quote_ => $i++, _ending_in_quote_ => $i++, _is_static_block_comment_ => $i++, _ri_first_ => $i++, _ri_last_ => $i++, _do_not_pad_ => $i++, _peak_batch_size_ => $i++, _batch_count_ => $i++, _rix_seqno_controlling_ci_ => $i++, _batch_CODE_type_ => $i++, _ri_starting_one_line_block_ => $i++, _runmatched_opening_indexes_ => $i++, _lp_object_count_this_batch_ => $i++, }; } ## end BEGIN BEGIN { # Sequence number assigned to the root of sequence tree. # The minimum of the actual sequences numbers is 4, so we can use 1 use constant SEQ_ROOT => 1; # Codes for insertion and deletion of blanks use constant DELETE => 0; use constant STABLE => 1; use constant INSERT => 2; # whitespace codes use constant WS_YES => 1; use constant WS_OPTIONAL => 0; use constant WS_NO => -1; # Token bond strengths. use constant NO_BREAK => 10_000; use constant VERY_STRONG => 100; use constant STRONG => 2.1; use constant NOMINAL => 1.1; use constant WEAK => 0.8; use constant VERY_WEAK => 0.55; # values for testing indexes in output array use constant UNDEFINED_INDEX => -1; # Maximum number of little messages; probably need not be changed. use constant MAX_NAG_MESSAGES => 6; # This is the decimal range of printable characters in ASCII. It is used to # make quick preliminary checks before resorting to using a regex. use constant ORD_PRINTABLE_MIN => 33; use constant ORD_PRINTABLE_MAX => 126; # Initialize constant hashes ... my @q; @q = qw( = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= ); @is_assignment{@q} = (1) x scalar(@q); # a hash needed by break_lists for efficiency: push @q, qw{ ; < > ~ f }; @is_non_list_type{@q} = (1) x scalar(@q); @q = qw(is if unless and or err last next redo return); @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q); # These block types may have text between the keyword and opening # curly. Note: 'else' does not, but must be included to allow trailing # if/elsif text to be appended. # patch for SWITCH/CASE: added 'case' and 'when' @q = qw(if elsif else unless while until for foreach case when catch); @is_if_elsif_else_unless_while_until_for_foreach{@q} = (1) x scalar(@q); @q = qw(if unless while until for foreach); @is_if_unless_while_until_for_foreach{@q} = (1) x scalar(@q); @q = qw(last next redo return); @is_last_next_redo_return{@q} = (1) x scalar(@q); # Map related block names into a common name to allow vertical alignment # used by sub make_alignment_patterns. Note: this is normally unchanged, # but it contains 'grep' and can be re-initialized in # sub initialize_grep_and_friends in a testing mode. %block_type_map = ( 'unless' => 'if', 'else' => 'if', 'elsif' => 'if', 'when' => 'if', 'default' => 'if', 'case' => 'if', 'sort' => 'map', 'grep' => 'map', ); @q = qw(if unless); @is_if_unless{@q} = (1) x scalar(@q); @q = qw(if elsif); @is_if_elsif{@q} = (1) x scalar(@q); @q = qw(if unless elsif); @is_if_unless_elsif{@q} = (1) x scalar(@q); @q = qw(if unless elsif else); @is_if_unless_elsif_else{@q} = (1) x scalar(@q); @q = qw(elsif else); @is_elsif_else{@q} = (1) x scalar(@q); @q = qw(and or err); @is_and_or{@q} = (1) x scalar(@q); # Identify certain operators which often occur in chains. # Note: the minus (-) causes a side effect of padding of the first line in # something like this (by sub set_logical_padding): # Checkbutton => 'Transmission checked', # -variable => \$TRANS # This usually improves appearance so it seems ok. @q = qw(&& || and or : ? . + - * /); @is_chain_operator{@q} = (1) x scalar(@q); # Operators that the user can request break before or after. # Note that some are keywords @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= . : ? && || and or err xor ); # We can remove semicolons after blocks preceded by these keywords @q = qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else unless while until for foreach given when default); @is_block_without_semicolon{@q} = (1) x scalar(@q); # We will allow semicolons to be added within these block types # as well as sub and package blocks. # NOTES: # 1. Note that these keywords are omitted: # switch case given when default sort map grep # 2. It is also ok to add for sub and package blocks and a labeled block # 3. But not okay for other perltidy types including: # { } ; G t # 4. Test files: blktype.t, blktype1.t, semicolon.t @q = qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else unless do while until eval for foreach ); @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q); # 'L' is token for opening { at hash key @q = qw< L { ( [ >; @is_opening_type{@q} = (1) x scalar(@q); # 'R' is token for closing } at hash key @q = qw< R } ) ] >; @is_closing_type{@q} = (1) x scalar(@q); @q = qw< { ( [ >; @is_opening_token{@q} = (1) x scalar(@q); @q = qw< } ) ] >; @is_closing_token{@q} = (1) x scalar(@q); @q = qw( ? : ); @is_ternary{@q} = (1) x scalar(@q); @q = qw< { ( [ ? >; @is_opening_sequence_token{@q} = (1) x scalar(@q); @q = qw< } ) ] : >; @is_closing_sequence_token{@q} = (1) x scalar(@q); %matching_token = ( '{' => '}', '(' => ')', '[' => ']', '?' => ':', '}' => '{', ')' => '(', ']' => '[', ':' => '?', ); # a hash needed by sub break_lists for labeling containers @q = qw( k => && || ? : . ); @is_container_label_type{@q} = (1) x scalar(@q); @q = qw( die confess croak warn ); @is_die_confess_croak_warn{@q} = (1) x scalar(@q); @q = qw( my our local ); @is_my_our_local{@q} = (1) x scalar(@q); # Braces -bbht etc must follow these. Note: experimentation with # including a simple comma shows that it adds little and can lead # to poor formatting in complex lists. @q = qw( = => ); @is_equal_or_fat_comma{@q} = (1) x scalar(@q); @q = qw( => ; h f ); push @q, ','; @is_counted_type{@q} = (1) x scalar(@q); # Tokens where --keep-old-break-xxx flags make soft breaks instead # of hard breaks. See b1433 and b1436. # NOTE: $type is used as the hash key for now; if other container tokens # are added it might be necessary to use a token/type mixture. @q = qw# -> ? : && || + - / * #; @is_soft_keep_break_type{@q} = (1) x scalar(@q); # these functions allow an identifier in the indirect object slot @q = qw( print printf sort exec system say); @is_indirect_object_taker{@q} = (1) x scalar(@q); # Define here tokens which may follow the closing brace of a do statement # on the same line, as in: # } while ( $something); my @dof = qw(until while unless if ; : ); push @dof, ','; @is_do_follower{@dof} = (1) x scalar(@dof); # what can follow a multi-line anonymous sub definition closing curly: my @asf = qw# ; : => or and && || ~~ !~~ ) #; push @asf, ','; @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf); # what can follow a one-line anonymous sub closing curly: # one-line anonymous subs also have ']' here... # see tk3.t and PP.pm my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #; push @asf1, ','; @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1); # What can follow a closing curly of a block # which is not an if/elsif/else/do/sort/map/grep/eval/sub # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl' my @obf = qw# ; : => or and && || ) #; push @obf, ','; @is_other_brace_follower{@obf} = (1) x scalar(@obf); } ## end BEGIN { ## begin closure to count instances # methods to count instances my $_count = 0; sub _increment_count { return ++$_count } sub _decrement_count { return --$_count } } ## end closure to count instances sub new { my ( $class, @args ) = @_; # we are given an object with a write_line() method to take lines my %defaults = ( sink_object => undef, diagnostics_object => undef, logger_object => undef, length_function => undef, is_encoded_data => EMPTY_STRING, fh_tee => undef, ); my %args = ( %defaults, @args ); my $length_function = $args{length_function}; my $is_encoded_data = $args{is_encoded_data}; my $fh_tee = $args{fh_tee}; my $logger_object = $args{logger_object}; my $diagnostics_object = $args{diagnostics_object}; # we create another object with a get_line() and peek_ahead() method my $sink_object = $args{sink_object}; my $file_writer_object = Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object ); # initialize closure variables... set_logger_object($logger_object); set_diagnostics_object($diagnostics_object); initialize_lp_vars(); initialize_csc_vars(); initialize_break_lists(); initialize_undo_ci(); initialize_process_line_of_CODE(); initialize_grind_batch_of_CODE(); initialize_get_final_indentation(); initialize_postponed_breakpoint(); initialize_batch_variables(); initialize_write_line(); my $vertical_aligner_object = Perl::Tidy::VerticalAligner->new( rOpts => $rOpts, file_writer_object => $file_writer_object, logger_object => $logger_object, diagnostics_object => $diagnostics_object, ); write_logfile_entry("\nStarting tokenization pass...\n"); if ( $rOpts->{'entab-leading-whitespace'} ) { write_logfile_entry( "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n" ); } elsif ( $rOpts->{'tabs'} ) { write_logfile_entry("Indentation will be with a tab character\n"); } else { write_logfile_entry( "Indentation will be with $rOpts->{'indent-columns'} spaces\n"); } # Initialize the $self array reference. # To add an item, first add a constant index in the BEGIN block above. my $self = []; # Basic data structures... $self->[_rlines_] = []; # = ref to array of lines of the file # 'rLL' = reference to the continuous liner array of all tokens in a file. # 'LL' stands for 'Linked List'. Using a linked list was a disaster, but # 'LL' stuck because it is easy to type. The 'rLL' array is updated # by sub 'respace_tokens' during reformatting. The indexes in 'rLL' begin # with '$K' by convention. $self->[_rLL_] = []; $self->[_Klimit_] = undef; # = maximum K index for rLL. # Indexes into the rLL list $self->[_K_opening_container_] = {}; $self->[_K_closing_container_] = {}; $self->[_K_opening_ternary_] = {}; $self->[_K_closing_ternary_] = {}; $self->[_K_first_seq_item_] = undef; # K of first token with a sequence # # 'rSS' is the 'Signed Sequence' list, a continuous list of all sequence # numbers with + or - indicating opening or closing. This list represents # the entire container tree and is invariant under reformatting. It can be # used to quickly travel through the tree. Indexes in the rSS array begin # with '$I' by convention. The 'Iss' arrays give the indexes in this list # of opening and closing sequence numbers. $self->[_rSS_] = []; $self->[_Iss_opening_] = []; $self->[_Iss_closing_] = []; # Arrays to help traverse the tree $self->[_rdepth_of_opening_seqno_] = []; $self->[_rblock_type_of_seqno_] = {}; $self->[_ris_asub_block_] = {}; $self->[_ris_sub_block_] = {}; # Mostly list characteristics and processing flags $self->[_rtype_count_by_seqno_] = {}; $self->[_ris_function_call_paren_] = {}; $self->[_rlec_count_by_seqno_] = {}; $self->[_ris_broken_container_] = {}; $self->[_ris_permanently_broken_] = {}; $self->[_rblank_and_comment_count_] = {}; $self->[_rhas_list_] = {}; $self->[_rhas_broken_list_] = {}; $self->[_rhas_broken_list_with_lec_] = {}; $self->[_rfirst_comma_line_index_] = {}; $self->[_rhas_code_block_] = {}; $self->[_rhas_broken_code_block_] = {}; $self->[_rhas_ternary_] = {}; $self->[_ris_excluded_lp_container_] = {}; $self->[_rlp_object_by_seqno_] = {}; $self->[_rwant_reduced_ci_] = {}; $self->[_rno_xci_by_seqno_] = {}; $self->[_rbrace_left_] = {}; $self->[_ris_bli_container_] = {}; $self->[_rparent_of_seqno_] = {}; $self->[_rchildren_of_seqno_] = {}; $self->[_ris_list_by_seqno_] = {}; $self->[_ris_cuddled_closing_brace_] = {}; $self->[_rbreak_container_] = {}; # prevent one-line blocks $self->[_rshort_nested_] = {}; # blocks not forced open $self->[_length_function_] = $length_function; $self->[_is_encoded_data_] = $is_encoded_data; # Some objects... $self->[_fh_tee_] = $fh_tee; $self->[_sink_object_] = $sink_object; $self->[_file_writer_object_] = $file_writer_object; $self->[_vertical_aligner_object_] = $vertical_aligner_object; $self->[_logger_object_] = $logger_object; # Reference to the batch being processed $self->[_this_batch_] = []; # Memory of processed text... $self->[_ris_special_identifier_token_] = {}; $self->[_last_line_leading_level_] = 0; $self->[_last_line_leading_type_] = '#'; $self->[_last_output_short_opening_token_] = 0; $self->[_added_semicolon_count_] = 0; $self->[_first_added_semicolon_at_] = 0; $self->[_last_added_semicolon_at_] = 0; $self->[_deleted_semicolon_count_] = 0; $self->[_first_deleted_semicolon_at_] = 0; $self->[_last_deleted_semicolon_at_] = 0; $self->[_embedded_tab_count_] = 0; $self->[_first_embedded_tab_at_] = 0; $self->[_last_embedded_tab_at_] = 0; $self->[_first_tabbing_disagreement_] = 0; $self->[_last_tabbing_disagreement_] = 0; $self->[_tabbing_disagreement_count_] = 0; $self->[_in_tabbing_disagreement_] = 0; $self->[_saw_VERSION_in_this_file_] = !$rOpts->{'pass-version-line'}; $self->[_saw_END_or_DATA_] = 0; $self->[_first_brace_tabbing_disagreement_] = undef; $self->[_in_brace_tabbing_disagreement_] = undef; # Hashes related to container welding... $self->[_radjusted_levels_] = []; # Weld data structures $self->[_rK_weld_left_] = {}; $self->[_rK_weld_right_] = {}; $self->[_rweld_len_right_at_K_] = {}; # -xci stuff $self->[_rseqno_controlling_my_ci_] = {}; $self->[_ris_seqno_controlling_ci_] = {}; $self->[_rspecial_side_comment_type_] = {}; $self->[_maximum_level_] = 0; $self->[_maximum_level_at_line_] = 0; $self->[_maximum_BLOCK_level_] = 0; $self->[_maximum_BLOCK_level_at_line_] = 0; $self->[_rKrange_code_without_comments_] = []; $self->[_rbreak_before_Kfirst_] = {}; $self->[_rbreak_after_Klast_] = {}; $self->[_converged_] = 0; # qw stuff $self->[_rstarting_multiline_qw_seqno_by_K_] = {}; $self->[_rending_multiline_qw_seqno_by_K_] = {}; $self->[_rKrange_multiline_qw_by_seqno_] = {}; $self->[_rmultiline_qw_has_extra_level_] = {}; $self->[_rcollapsed_length_by_seqno_] = {}; $self->[_rbreak_before_container_by_seqno_] = {}; $self->[_roverride_cab3_] = {}; $self->[_ris_assigned_structure_] = {}; $self->[_ris_short_broken_eval_block_] = {}; $self->[_ris_bare_trailing_comma_by_seqno_] = {}; $self->[_rseqno_non_indenting_brace_by_ix_] = {}; $self->[_rmax_vertical_tightness_] = {}; $self->[_no_vertical_tightness_flags_] = 0; # This flag will be updated later by a call to get_save_logfile() $self->[_save_logfile_] = defined($logger_object); # Be sure all variables in $self have been initialized above. To find the # correspondence of index numbers and array names, copy a list to a file # and use the unix 'nl' command to number lines 1.. if (DEVEL_MODE) { my @non_existant; foreach ( 0 .. _LAST_SELF_INDEX_ ) { if ( !exists( $self->[$_] ) ) { push @non_existant, $_; } } if (@non_existant) { Fault("These indexes in self not initialized: (@non_existant)\n"); } } bless $self, $class; # Safety check..this is not a class yet if ( _increment_count() > 1 ) { confess "Attempt to create more than 1 object in $class, which is not a true class yet\n"; } return $self; } ## end sub new ###################################### # CODE SECTION 2: Some Basic Utilities ###################################### sub check_rLL { # Verify that the rLL array has not been auto-vivified my ( $self, $msg ) = @_; my $rLL = $self->[_rLL_]; my $Klimit = $self->[_Klimit_]; my $num = @{$rLL}; if ( ( defined($Klimit) && $Klimit != $num - 1 ) || ( !defined($Klimit) && $num > 0 ) ) { # This fault can occur if the array has been accessed for an index # greater than $Klimit, which is the last token index. Just accessing # the array above index $Klimit, not setting a value, can cause @rLL to # increase beyond $Klimit. If this occurs, the problem can be located # by making calls to this routine at different locations in # sub 'finish_formatting'. $Klimit = 'undef' if ( !defined($Klimit) ); $msg = EMPTY_STRING unless $msg; Fault("$msg ERROR: rLL has num=$num but Klimit='$Klimit'\n"); } return; } ## end sub check_rLL sub check_keys { my ( $rtest, $rvalid, $msg, $exact_match ) = @_; # Check the keys of a hash: # $rtest = ref to hash to test # $rvalid = ref to hash with valid keys # $msg = a message to write in case of error # $exact_match defines the type of check: # = false: test hash must not have unknown key # = true: test hash must have exactly same keys as known hash my @unknown_keys = grep { !exists $rvalid->{$_} } keys %{$rtest}; my @missing_keys = grep { !exists $rtest->{$_} } keys %{$rvalid}; my $error = @unknown_keys; if ($exact_match) { $error ||= @missing_keys } if ($error) { local $LIST_SEPARATOR = ')('; my @expected_keys = sort keys %{$rvalid}; @unknown_keys = sort @unknown_keys; Fault(<<EOM); ------------------------------------------------------------------------ Program error detected checking hash keys Message is: '$msg' Expected keys: (@expected_keys) Unknown key(s): (@unknown_keys) Missing key(s): (@missing_keys) ------------------------------------------------------------------------ EOM } return; } ## end sub check_keys sub check_token_array { my $self = shift; # Check for errors in the array of tokens. This is only called # when the DEVEL_MODE flag is set, so this Fault will only occur # during code development. my $rLL = $self->[_rLL_]; foreach my $KK ( 0 .. @{$rLL} - 1 ) { my $nvars = @{ $rLL->[$KK] }; if ( $nvars != _NVARS ) { my $NVARS = _NVARS; my $type = $rLL->[$KK]->[_TYPE_]; $type = '*' unless defined($type); # The number of variables per token node is _NVARS and was set when # the array indexes were generated. So if the number of variables # is different we have done something wrong, like not store all of # them in sub 'write_line' when they were received from the # tokenizer. Fault( "number of vars for node $KK, type '$type', is $nvars but should be $NVARS" ); } foreach my $var ( _TOKEN_, _TYPE_ ) { if ( !defined( $rLL->[$KK]->[$var] ) ) { my $iline = $rLL->[$KK]->[_LINE_INDEX_]; # This is a simple check that each token has some basic # variables. In other words, that there are no holes in the # array of tokens. Sub 'write_line' pushes tokens into the # $rLL array, so this should guarantee no gaps. Fault("Undefined variable $var for K=$KK, line=$iline\n"); } } } return; } ## end sub check_token_array { ## begin closure check_line_hashes # This code checks that no auto-vivification occurs in the 'line' hash my %valid_line_hash; BEGIN { # These keys are defined for each line in the formatter # Each line must have exactly these quantities my @valid_line_keys = qw( _curly_brace_depth _ending_in_quote _guessed_indentation_level _line_number _line_text _line_type _paren_depth _quote_character _rK_range _square_bracket_depth _starting_in_quote _ended_in_blank_token _code_type _ci_level_0 _level_0 _nesting_blocks_0 _nesting_tokens_0 ); @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys); } ## end BEGIN sub check_line_hashes { my $self = shift; my $rlines = $self->[_rlines_]; foreach my $rline ( @{$rlines} ) { my $iline = $rline->{_line_number}; my $line_type = $rline->{_line_type}; check_keys( $rline, \%valid_line_hash, "Checkpoint: line number =$iline, line_type=$line_type", 1 ); } return; } ## end sub check_line_hashes } ## end closure check_line_hashes { ## begin closure for logger routines my $logger_object; # Called once per file to initialize the logger object sub set_logger_object { $logger_object = shift; return; } sub get_logger_object { return $logger_object; } sub get_input_stream_name { my $input_stream_name = EMPTY_STRING; if ($logger_object) { $input_stream_name = $logger_object->get_input_stream_name(); } return $input_stream_name; } ## end sub get_input_stream_name # interface to Perl::Tidy::Logger routines sub warning { my ( $msg, $msg_line_number ) = @_; if ($logger_object) { $logger_object->warning( $msg, $msg_line_number ); } return; } sub complain { my ( $msg, $msg_line_number ) = @_; if ($logger_object) { $logger_object->complain( $msg, $msg_line_number ); } return; } ## end sub complain sub write_logfile_entry { my @msg = @_; if ($logger_object) { $logger_object->write_logfile_entry(@msg); } return; } ## end sub write_logfile_entry sub get_saw_brace_error { if ($logger_object) { return $logger_object->get_saw_brace_error(); } return; } ## end sub get_saw_brace_error sub we_are_at_the_last_line { if ($logger_object) { $logger_object->we_are_at_the_last_line(); } return; } ## end sub we_are_at_the_last_line } ## end closure for logger routines { ## begin closure for diagnostics routines my $diagnostics_object; # Called once per file to initialize the diagnostics object sub set_diagnostics_object { $diagnostics_object = shift; return; } # Available for debugging but not currently used: sub write_diagnostics { my ( $msg, $line_number ) = @_; if ($diagnostics_object) { $diagnostics_object->write_diagnostics( $msg, $line_number ); } return; } ## end sub write_diagnostics } ## end closure for diagnostics routines sub get_convergence_check { my ($self) = @_; return $self->[_converged_]; } sub get_output_line_number { my ($self) = @_; my $vao = $self->[_vertical_aligner_object_]; return $vao->get_output_line_number(); } sub want_blank_line { my $self = shift; $self->flush(); my $file_writer_object = $self->[_file_writer_object_]; $file_writer_object->want_blank_line(); return; } ## end sub want_blank_line sub write_unindented_line { my ( $self, $line ) = @_; $self->flush(); my $file_writer_object = $self->[_file_writer_object_]; $file_writer_object->write_line($line); return; } ## end sub write_unindented_line sub consecutive_nonblank_lines { my ($self) = @_; my $file_writer_object = $self->[_file_writer_object_]; my $vao = $self->[_vertical_aligner_object_]; return $file_writer_object->get_consecutive_nonblank_lines() + $vao->get_cached_line_count(); } ## end sub consecutive_nonblank_lines sub split_words { # given a string containing words separated by whitespace, # return the list of words my ($str) = @_; return unless $str; $str =~ s/\s+$//; $str =~ s/^\s+//; return split( /\s+/, $str ); } ## end sub split_words ########################################### # CODE SECTION 3: Check and process options ########################################### sub check_options { # This routine is called to check the user-supplied run parameters # and to configure the control hashes to them. $rOpts = shift; $controlled_comma_style = 0; initialize_whitespace_hashes(); initialize_bond_strength_hashes(); # This function must be called early to get hashes with grep initialized initialize_grep_and_friends(); # Make needed regex patterns for matching text. # NOTE: sub_matching_patterns must be made first because later patterns use # them; see RT #133130. make_sub_matching_pattern(); # must be first pattern made make_static_block_comment_pattern(); make_static_side_comment_pattern(); make_closing_side_comment_prefix(); make_closing_side_comment_list_pattern(); $format_skipping_pattern_begin = make_format_skipping_pattern( 'format-skipping-begin', '#<<<' ); $format_skipping_pattern_end = make_format_skipping_pattern( 'format-skipping-end', '#>>>' ); make_non_indenting_brace_pattern(); # If closing side comments ARE selected, then we can safely # delete old closing side comments unless closing side comment # warnings are requested. This is a good idea because it will # eliminate any old csc's which fall below the line count threshold. # We cannot do this if warnings are turned on, though, because we # might delete some text which has been added. So that must # be handled when comments are created. And we cannot do this # with -io because -csc will be skipped altogether. if ( $rOpts->{'closing-side-comments'} ) { if ( !$rOpts->{'closing-side-comment-warnings'} && !$rOpts->{'indent-only'} ) { $rOpts->{'delete-closing-side-comments'} = 1; } } # If closing side comments ARE NOT selected, but warnings ARE # selected and we ARE DELETING csc's, then we will pretend to be # adding with a huge interval. This will force the comments to be # generated for comparison with the old comments, but not added. elsif ( $rOpts->{'closing-side-comment-warnings'} ) { if ( $rOpts->{'delete-closing-side-comments'} ) { $rOpts->{'delete-closing-side-comments'} = 0; $rOpts->{'closing-side-comments'} = 1; $rOpts->{'closing-side-comment-interval'} = 100_000_000; } } else { ## ok - no -csc issues } my $comment = $rOpts->{'add-missing-else-comment'}; if ( !$comment ) { $comment = "##FIXME - added with perltidy -ame"; } else { $comment = substr( $comment, 0, 60 ); $comment =~ s/^\s+//; $comment =~ s/\s+$//; $comment =~ s/\n/ /g; if ( substr( $comment, 0, 1 ) ne '#' ) { $comment = '#' . $comment; } } $rOpts->{'add-missing-else-comment'} = $comment; make_bli_pattern(); make_bl_pattern(); make_block_brace_vertical_tightness_pattern(); make_blank_line_pattern(); make_keyword_group_list_pattern(); prepare_cuddled_block_types(); if ( $rOpts->{'dump-cuddled-block-list'} ) { dump_cuddled_block_list(*STDOUT); Exit(0); } # -xlp implies -lp if ( $rOpts->{'extended-line-up-parentheses'} ) { $rOpts->{'line-up-parentheses'} ||= 1; } if ( $rOpts->{'line-up-parentheses'} ) { if ( $rOpts->{'indent-only'} || !$rOpts->{'add-newlines'} || !$rOpts->{'delete-old-newlines'} ) { Warn(<<EOM); ----------------------------------------------------------------------- Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp The -lp indentation logic requires that perltidy be able to coordinate arbitrarily large numbers of line breakpoints. This isn't possible with these flags. ----------------------------------------------------------------------- EOM $rOpts->{'line-up-parentheses'} = 0; $rOpts->{'extended-line-up-parentheses'} = 0; } if ( $rOpts->{'whitespace-cycle'} ) { Warn(<<EOM); Conflict: -wc cannot currently be used with the -lp option; ignoring -wc EOM $rOpts->{'whitespace-cycle'} = 0; } } # At present, tabs are not compatible with the line-up-parentheses style # (it would be possible to entab the total leading whitespace # just prior to writing the line, if desired). if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) { Warn(<<EOM); Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et. EOM $rOpts->{'tabs'} = 0; } # Likewise, tabs are not compatible with outdenting.. if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) { Warn(<<EOM); Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et. EOM $rOpts->{'tabs'} = 0; } if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) { Warn(<<EOM); Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et. EOM $rOpts->{'tabs'} = 0; } if ( !$rOpts->{'space-for-semicolon'} ) { $want_left_space{'f'} = -1; } if ( $rOpts->{'space-terminal-semicolon'} ) { $want_left_space{';'} = 1; } # We should put an upper bound on any -sil=n value. Otherwise enormous # files could be created by mistake. for ( $rOpts->{'starting-indentation-level'} ) { if ( $_ && $_ > 100 ) { Warn(<<EOM); The value --starting-indentation-level=$_ is very large; a mistake? resetting to 0; EOM $_ = 0; } } # Require -msp > 0 to avoid future parsing problems (issue c147) for ( $rOpts->{'minimum-space-to-comment'} ) { if ( !$_ || $_ <= 0 ) { $_ = 1 } } # implement outdenting preferences for keywords %outdent_keyword = (); my @okw = split_words( $rOpts->{'outdent-keyword-list'} ); if ( !@okw ) { @okw = qw(next last redo goto return); # defaults } # FUTURE: if not a keyword, assume that it is an identifier foreach (@okw) { if ( Perl::Tidy::Tokenizer::is_keyword($_) ) { $outdent_keyword{$_} = 1; } else { Warn("ignoring '$_' in -okwl list; not a perl keyword"); } } # setup hash for -kpit option %keyword_paren_inner_tightness = (); my $kpit_value = $rOpts->{'keyword-paren-inner-tightness'}; if ( defined($kpit_value) && $kpit_value != 1 ) { my @kpit = split_words( $rOpts->{'keyword-paren-inner-tightness-list'} ); if ( !@kpit ) { @kpit = qw(if elsif unless while until for foreach); # defaults } # we will allow keywords and user-defined identifiers foreach (@kpit) { $keyword_paren_inner_tightness{$_} = $kpit_value; } } # implement user whitespace preferences if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) { @want_left_space{@q} = (1) x scalar(@q); } if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) { @want_right_space{@q} = (1) x scalar(@q); } if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) { @want_left_space{@q} = (-1) x scalar(@q); } if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) { @want_right_space{@q} = (-1) x scalar(@q); } if ( $rOpts->{'dump-want-left-space'} ) { dump_want_left_space(*STDOUT); Exit(0); } if ( $rOpts->{'dump-want-right-space'} ) { dump_want_right_space(*STDOUT); Exit(0); } initialize_space_after_keyword(); initialize_extended_block_tightness_list(); initialize_token_break_preferences(); #-------------------------------------------------------------- # The combination -lp -iob -vmll -bbx=2 can be unstable (b1266) #-------------------------------------------------------------- # The -vmll and -lp parameters do not really work well together. # To avoid instabilities, we will change any -bbx=2 to -bbx=1 (stable). # NOTE: we could make this more precise by looking at any exclusion # flags for -lp, and allowing -bbx=2 for excluded types. if ( $rOpts->{'variable-maximum-line-length'} && $rOpts->{'ignore-old-breakpoints'} && $rOpts->{'line-up-parentheses'} ) { my @changed; foreach my $key ( keys %break_before_container_types ) { if ( $break_before_container_types{$key} == 2 ) { $break_before_container_types{$key} = 1; push @changed, $key; } } if (@changed) { # we could write a warning here } } #----------------------------------------------------------- # The combination -lp -vmll can be unstable if -ci<2 (b1267) #----------------------------------------------------------- # The -vmll and -lp parameters do not really work well together. # This is a very crude fix for an unusual parameter combination. if ( $rOpts->{'variable-maximum-line-length'} && $rOpts->{'line-up-parentheses'} && $rOpts->{'continuation-indentation'} < 2 ) { $rOpts->{'continuation-indentation'} = 2; ##Warn("Increased -ci=n to n=2 for stability with -lp and -vmll\n"); } #----------------------------------------------------------- # The combination -lp -vmll -atc -dtc can be unstable #----------------------------------------------------------- # This fixes b1386 b1387 b1388 which had -wtc='b' # Updated to to include any -wtc to fix b1426 if ( $rOpts->{'variable-maximum-line-length'} && $rOpts->{'line-up-parentheses'} && $rOpts->{'add-trailing-commas'} && $rOpts->{'delete-trailing-commas'} && $rOpts->{'want-trailing-commas'} ) { $rOpts->{'delete-trailing-commas'} = 0; ## Issuing a warning message causes trouble with test cases, and this combo is ## so rare that it is unlikely to not occur in practice. So skip warning. ## Warn( ##"The combination -vmll -lp -atc -dtc can be unstable; turning off -dtc\n" ## ); } %container_indentation_options = (); foreach my $pair ( [ 'break-before-hash-brace-and-indent', '{' ], [ 'break-before-square-bracket-and-indent', '[' ], [ 'break-before-paren-and-indent', '(' ], ) { my ( $key, $tok ) = @{$pair}; my $opt = $rOpts->{$key}; if ( defined($opt) && $opt > 0 && $break_before_container_types{$tok} ) { # (1) -lp is not compatible with opt=2, silently set to opt=0 # (2) opt=0 and 2 give same result if -i=-ci; but opt=0 is faster # (3) set opt=0 if -i < -ci (can be unstable, case b1355) if ( $opt == 2 ) { if ( $rOpts->{'line-up-parentheses'} || ( $rOpts->{'indent-columns'} <= $rOpts->{'continuation-indentation'} ) ) { $opt = 0; } } $container_indentation_options{$tok} = $opt; } } $right_bond_strength{'{'} = WEAK; $left_bond_strength{'{'} = VERY_STRONG; # make -l=0 equal to -l=infinite if ( !$rOpts->{'maximum-line-length'} ) { $rOpts->{'maximum-line-length'} = 1_000_000; } # make -lbl=0 equal to -lbl=infinite if ( !$rOpts->{'long-block-line-count'} ) { $rOpts->{'long-block-line-count'} = 1_000_000; } # hashes used to simplify setting whitespace %tightness = ( '{' => $rOpts->{'brace-tightness'}, '}' => $rOpts->{'brace-tightness'}, '(' => $rOpts->{'paren-tightness'}, ')' => $rOpts->{'paren-tightness'}, '[' => $rOpts->{'square-bracket-tightness'}, ']' => $rOpts->{'square-bracket-tightness'}, ); if ( $rOpts->{'ignore-old-breakpoints'} ) { my @conflicts; if ( $rOpts->{'break-at-old-method-breakpoints'} ) { $rOpts->{'break-at-old-method-breakpoints'} = 0; push @conflicts, '--break-at-old-method-breakpoints (-bom)'; } if ( $rOpts->{'break-at-old-comma-breakpoints'} ) { $rOpts->{'break-at-old-comma-breakpoints'} = 0; push @conflicts, '--break-at-old-comma-breakpoints (-boc)'; } if ( $rOpts->{'break-at-old-semicolon-breakpoints'} ) { $rOpts->{'break-at-old-semicolon-breakpoints'} = 0; push @conflicts, '--break-at-old-semicolon-breakpoints (-bos)'; } if ( $rOpts->{'keep-old-breakpoints-before'} ) { $rOpts->{'keep-old-breakpoints-before'} = EMPTY_STRING; push @conflicts, '--keep-old-breakpoints-before (-kbb)'; } if ( $rOpts->{'keep-old-breakpoints-after'} ) { $rOpts->{'keep-old-breakpoints-after'} = EMPTY_STRING; push @conflicts, '--keep-old-breakpoints-after (-kba)'; } if (@conflicts) { my $msg = join( "\n ", " Conflict: These conflicts with --ignore-old-breakponts (-iob) will be turned off:", @conflicts ) . "\n"; Warn($msg); } # Note: These additional parameters are made inactive by -iob. # They are silently turned off here because they are on by default. # We would generate unexpected warnings if we issued a warning. $rOpts->{'break-at-old-keyword-breakpoints'} = 0; $rOpts->{'break-at-old-logical-breakpoints'} = 0; $rOpts->{'break-at-old-ternary-breakpoints'} = 0; $rOpts->{'break-at-old-attribute-breakpoints'} = 0; } %keep_break_before_type = (); initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-before'}, 'kbb', \%keep_break_before_type ); %keep_break_after_type = (); initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-after'}, 'kba', \%keep_break_after_type ); # Modify %keep_break_before and %keep_break_after to avoid conflicts # with %want_break_before; fixes b1436. # This became necessary after breaks for some tokens were converted # from hard to soft (see b1433). # We could do this for all tokens, but to minimize changes to existing # code we currently only do this for the soft break tokens. foreach my $key ( keys %keep_break_before_type ) { if ( defined( $want_break_before{$key} ) && !$want_break_before{$key} && $is_soft_keep_break_type{$key} ) { $keep_break_after_type{$key} = $keep_break_before_type{$key}; delete $keep_break_before_type{$key}; } } foreach my $key ( keys %keep_break_after_type ) { if ( defined( $want_break_before{$key} ) && $want_break_before{$key} && $is_soft_keep_break_type{$key} ) { $keep_break_before_type{$key} = $keep_break_after_type{$key}; delete $keep_break_after_type{$key}; } } $controlled_comma_style ||= $keep_break_before_type{','}; $controlled_comma_style ||= $keep_break_after_type{','}; initialize_global_option_vars(); initialize_line_length_vars(); # after 'initialize_global_option_vars' initialize_trailing_comma_rules(); # after 'initialize_line_length_vars' initialize_weld_nested_exclusion_rules(); initialize_weld_fat_comma_rules(); %line_up_parentheses_control_hash = (); $line_up_parentheses_control_is_lxpl = 1; my $lpxl = $rOpts->{'line-up-parentheses-exclusion-list'}; my $lpil = $rOpts->{'line-up-parentheses-inclusion-list'}; if ( $lpxl && $lpil ) { Warn( <<EOM ); You entered values for both -lpxl=s and -lpil=s; the -lpil list will be ignored EOM } if ($lpxl) { $line_up_parentheses_control_is_lxpl = 1; initialize_line_up_parentheses_control_hash( $rOpts->{'line-up-parentheses-exclusion-list'}, 'lpxl' ); } elsif ($lpil) { $line_up_parentheses_control_is_lxpl = 0; initialize_line_up_parentheses_control_hash( $rOpts->{'line-up-parentheses-inclusion-list'}, 'lpil' ); } else { ## ok - neither -lpxl nor -lpil } return; } ## end sub check_options use constant ALIGN_GREP_ALIASES => 0; sub initialize_grep_and_friends { # Initialize or re-initialize hashes with 'grep' and grep aliases. This # must be done after each set of options because new grep aliases may be # used. # re-initialize the hashes ... this is critical! %is_sort_map_grep = (); my @q = qw(sort map grep); @is_sort_map_grep{@q} = (1) x scalar(@q); my $olbxl = $rOpts->{'one-line-block-exclusion-list'}; my %is_olb_exclusion_word; if ( defined($olbxl) ) { my @list = split_words($olbxl); if (@list) { @is_olb_exclusion_word{@list} = (1) x scalar(@list); } } # Make the list of block types which may be re-formed into one line. # They will be modified with the grep-alias-list below and # by sub 'prepare_cuddled_block_types'. # Note that it is essential to always re-initialize the hash here: %want_one_line_block = (); if ( !$is_olb_exclusion_word{'*'} ) { foreach (qw(sort map grep eval)) { if ( !$is_olb_exclusion_word{$_} ) { $want_one_line_block{$_} = 1 } } } # Note that any 'grep-alias-list' string has been preprocessed to be a # trimmed, space-separated list. my $str = $rOpts->{'grep-alias-list'}; my @grep_aliases = split /\s+/, $str; if (@grep_aliases) { @{is_sort_map_grep}{@grep_aliases} = (1) x scalar(@grep_aliases); if ( $want_one_line_block{'grep'} ) { @{want_one_line_block}{@grep_aliases} = (1) x scalar(@grep_aliases); } } ##@q = qw(sort map grep eval); %is_sort_map_grep_eval = %is_sort_map_grep; $is_sort_map_grep_eval{'eval'} = 1; ##@q = qw(sort map grep eval do); %is_sort_map_grep_eval_do = %is_sort_map_grep_eval; $is_sort_map_grep_eval_do{'do'} = 1; # These block types can take ci. This is used by the -xci option. # Note that the 'sub' in this list is an anonymous sub. To be more correct # we could remove sub and use ASUB pattern to also handle a # prototype/signature. But that would slow things down and would probably # never be useful. ##@q = qw( do sub eval sort map grep ); %is_block_with_ci = %is_sort_map_grep_eval_do; $is_block_with_ci{'sub'} = 1; %is_keyword_returning_list = (); @q = qw( grep keys map reverse sort split ); push @q, @grep_aliases; @is_keyword_returning_list{@q} = (1) x scalar(@q); # This code enables vertical alignment of grep aliases for testing. It has # not been found to be beneficial, so it is off by default. But it is # useful for precise testing of the grep alias coding. if (ALIGN_GREP_ALIASES) { %block_type_map = ( 'unless' => 'if', 'else' => 'if', 'elsif' => 'if', 'when' => 'if', 'default' => 'if', 'case' => 'if', 'sort' => 'map', 'grep' => 'map', ); foreach (@q) { $block_type_map{$_} = 'map' unless ( $_ eq 'map' ); } } return; } ## end sub initialize_grep_and_friends sub initialize_weld_nested_exclusion_rules { %weld_nested_exclusion_rules = (); my $opt_name = 'weld-nested-exclusion-list'; my $str = $rOpts->{$opt_name}; return unless ($str); $str =~ s/^\s+//; $str =~ s/\s+$//; return unless ($str); # There are four container tokens. my %token_keys = ( '(' => '(', '[' => '[', '{' => '{', 'q' => 'q', ); # We are parsing an exclusion list for nested welds. The list is a string # with spaces separating any number of items. Each item consists of three # pieces of information: # <optional position> <optional type> <type of container> # < ^ or . > < k or K > < ( [ { > # The last character is the required container type and must be one of: # ( = paren # [ = square bracket # { = brace # An optional leading position indicator: # ^ means the leading token position in the weld # . means a secondary token position in the weld # no position indicator means all positions match # An optional alphanumeric character between the position and container # token selects to which the rule applies: # k = any keyword # K = any non-keyword # f = function call # F = not a function call # w = function or keyword # W = not a function or keyword # no letter means any preceding type matches # Examples: # ^( - the weld must not start with a paren # .( - the second and later tokens may not be parens # ( - no parens in weld # ^K( - exclude a leading paren not preceded by a keyword # .k( - exclude a secondary paren preceded by a keyword # [ { - exclude all brackets and braces my @items = split /\s+/, $str; my $msg1; my $msg2; foreach my $item (@items) { my $item_save = $item; my $tok = chop($item); my $key = $token_keys{$tok}; if ( !defined($key) ) { $msg1 .= " '$item_save'"; next; } if ( !defined( $weld_nested_exclusion_rules{$key} ) ) { $weld_nested_exclusion_rules{$key} = []; } my $rflags = $weld_nested_exclusion_rules{$key}; # A 'q' means do not weld quotes if ( $tok eq 'q' ) { $rflags->[0] = '*'; $rflags->[1] = '*'; next; } my $pos = '*'; my $select = '*'; if ($item) { if ( $item =~ /^([\^\.])?([kKfFwW])?$/ ) { $pos = $1 if ($1); $select = $2 if ($2); } else { $msg1 .= " '$item_save'"; next; } } my $err; if ( $pos eq '^' || $pos eq '*' ) { if ( defined( $rflags->[0] ) && $rflags->[0] ne $select ) { $err = 1; } $rflags->[0] = $select; } if ( $pos eq '.' || $pos eq '*' ) { if ( defined( $rflags->[1] ) && $rflags->[1] ne $select ) { $err = 1; } $rflags->[1] = $select; } if ($err) { $msg2 .= " '$item_save'"; } } if ($msg1) { Warn(<<EOM); Unexpecting symbol(s) encountered in --$opt_name will be ignored: $msg1 EOM } if ($msg2) { Warn(<<EOM); Multiple specifications were encountered in the --weld-nested-exclusion-list for: $msg2 Only the last will be used. EOM } return; } ## end sub initialize_weld_nested_exclusion_rules sub initialize_weld_fat_comma_rules { # Initialize a hash controlling which opening token types can be # welded around a fat comma %weld_fat_comma_rules = (); # The -wfc flag turns on welding of '=>' after an opening paren if ( $rOpts->{'weld-fat-comma'} ) { $weld_fat_comma_rules{'('} = 1 } # This could be generalized in the future by introducing a parameter # -weld-fat-comma-after=str (-wfca=str), where str contains any of: # * { [ ( # to indicate which opening parens may weld to a subsequent '=>' # The flag -wfc would then be equivalent to -wfca='(' # This has not been done because it is not yet clear how useful # this generalization would be. return; } ## end sub initialize_weld_fat_comma_rules sub initialize_line_up_parentheses_control_hash { my ( $str, $opt_name ) = @_; return unless ($str); $str =~ s/^\s+//; $str =~ s/\s+$//; return unless ($str); # The format is space separated items, where each item must consist of a # string with a token type preceded by an optional text token and followed # by an integer: # For example: # W(1 # = (flag1)(key)(flag2), where # flag1 = 'W' # key = '(' # flag2 = '1' my @items = split /\s+/, $str; my $msg1; my $msg2; foreach my $item (@items) { my $item_save = $item; my ( $flag1, $key, $flag2 ); if ( $item =~ /^([^\(\]\{]*)?([\(\{\[])(\d)?$/ ) { $flag1 = $1 if $1; $key = $2 if $2; $flag2 = $3 if $3; } else { $msg1 .= " '$item_save'"; next; } if ( !defined($key) ) { $msg1 .= " '$item_save'"; next; } # Check for valid flag1 if ( !defined($flag1) ) { $flag1 = '*' } if ( $flag1 !~ /^[kKfFwW\*]$/ ) { $msg1 .= " '$item_save'"; next; } # Check for valid flag2 # 0 or blank: ignore container contents # 1 all containers with sublists match # 2 all containers with sublists, code blocks or ternary operators match # ... this could be extended in the future if ( !defined($flag2) ) { $flag2 = 0 } if ( $flag2 !~ /^[012]$/ ) { $msg1 .= " '$item_save'"; next; } if ( !defined( $line_up_parentheses_control_hash{$key} ) ) { $line_up_parentheses_control_hash{$key} = [ $flag1, $flag2 ]; next; } # check for multiple conflicting specifications my $rflags = $line_up_parentheses_control_hash{$key}; my $err; if ( defined( $rflags->[0] ) && $rflags->[0] ne $flag1 ) { $err = 1; $rflags->[0] = $flag1; } if ( defined( $rflags->[1] ) && $rflags->[1] ne $flag2 ) { $err = 1; $rflags->[1] = $flag2; } $msg2 .= " '$item_save'" if ($err); next; } if ($msg1) { Warn(<<EOM); Unexpecting symbol(s) encountered in --$opt_name will be ignored: $msg1 EOM } if ($msg2) { Warn(<<EOM); Multiple specifications were encountered in the $opt_name at: $msg2 Only the last will be used. EOM } # Speedup: we can turn off -lp if it is not actually used if ($line_up_parentheses_control_is_lxpl) { my $all_off = 1; foreach my $key (qw# ( { [ #) { my $rflags = $line_up_parentheses_control_hash{$key}; if ( defined($rflags) ) { my ( $flag1, $flag2 ) = @{$rflags}; if ( $flag1 && $flag1 ne '*' ) { $all_off = 0; last } if ($flag2) { $all_off = 0; last } } } if ($all_off) { $rOpts->{'line-up-parentheses'} = EMPTY_STRING; } } return; } ## end sub initialize_line_up_parentheses_control_hash sub initialize_space_after_keyword { # default keywords for which space is introduced before an opening paren # (at present, including them messes up vertical alignment) my @sak = qw(my local our and or xor err eq ne if else elsif until unless while for foreach return switch case given when catch); %space_after_keyword = map { $_ => 1 } @sak; # first remove any or all of these if desired if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) { # -nsak='*' selects all the above keywords if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) } @space_after_keyword{@q} = (0) x scalar(@q); } # then allow user to add to these defaults if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) { @space_after_keyword{@q} = (1) x scalar(@q); } return; } ## end sub initialize_space_after_keyword sub initialize_extended_block_tightness_list { # Setup the control hash for --extended-block-tightness # keywords taking indirect objects: my @k_list = keys %is_indirect_object_taker; # type symbols which may precede an opening block brace my @t_list = qw($ @ % & *); push @t_list, '$#'; my @all = ( @k_list, @t_list ); # We will build the selection in %hash # By default the option is 'on' for keywords only (-xbtl='k') my %hash; @hash{@k_list} = (1) x scalar(@k_list); @hash{@t_list} = (0) x scalar(@t_list); # This can be overridden with -xbtl="..." my $long_name = 'extended-block-tightness-list'; if ( $rOpts->{$long_name} ) { my @words = split_words( $rOpts->{$long_name} ); my @unknown; # Turn everything off @hash{@all} = (0) x scalar(@all); # Then turn on selections foreach my $word (@words) { # 'print' etc turns on a specific word or symbol if ( defined( $hash{$word} ) ) { $hash{$word} = 1; } # 'k' turns on all keywords elsif ( $word eq 'k' ) { @hash{@k_list} = (1) x scalar(@k_list); } # 't' turns on all symbols elsif ( $word eq 't' ) { @hash{@t_list} = (1) x scalar(@t_list); } # 'kt' same as 'k' and 't' for convenience elsif ( $word eq 'kt' ) { @hash{@all} = (1) x scalar(@all); } # Anything else is an error else { push @unknown, $word } } if (@unknown) { my $num = @unknown; local $LIST_SEPARATOR = SPACE; Warn(<<EOM); $num unrecognized keyword(s) were input with --$long_name : @unknown EOM } } # Transfer the result to the global hash %extended_block_tightness_list = %hash; return; } ## end sub initialize_extended_block_tightness_list sub initialize_token_break_preferences { # implement user break preferences my $break_after = sub { my @toks = @_; foreach my $tok (@toks) { if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/: if ( $tok eq ',' ) { $controlled_comma_style = 1 } my $lbs = $left_bond_strength{$tok}; my $rbs = $right_bond_strength{$tok}; if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) { ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = ( $lbs, $rbs ); } } return; }; my $break_before = sub { my @toks = @_; foreach my $tok (@toks) { if ( $tok eq ',' ) { $controlled_comma_style = 1 } my $lbs = $left_bond_strength{$tok}; my $rbs = $right_bond_strength{$tok}; if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) { ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = ( $lbs, $rbs ); } } return; }; $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} ); $break_before->(@all_operators) if ( $rOpts->{'break-before-all-operators'} ); $break_after->( split_words( $rOpts->{'want-break-after'} ) ); $break_before->( split_words( $rOpts->{'want-break-before'} ) ); # make note if breaks are before certain key types %want_break_before = (); foreach my $tok ( @all_operators, ',' ) { $want_break_before{$tok} = $left_bond_strength{$tok} < $right_bond_strength{$tok}; } # Coordinate ?/: breaks, which must be similar # The small strength 0.01 which is added is 1% of the strength of one # indentation level and seems to work okay. if ( !$want_break_before{':'} ) { $want_break_before{'?'} = $want_break_before{':'}; $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01; $left_bond_strength{'?'} = NO_BREAK; } # Only make a hash entry for the next parameters if values are defined. # That allows a quick check to be made later. %break_before_container_types = (); for ( $rOpts->{'break-before-hash-brace'} ) { $break_before_container_types{'{'} = $_ if $_ && $_ > 0; } for ( $rOpts->{'break-before-square-bracket'} ) { $break_before_container_types{'['} = $_ if $_ && $_ > 0; } for ( $rOpts->{'break-before-paren'} ) { $break_before_container_types{'('} = $_ if $_ && $_ > 0; } return; } ## end sub initialize_token_break_preferences use constant DEBUG_KB => 0; sub initialize_keep_old_breakpoints { my ( $str, $short_name, $rkeep_break_hash ) = @_; return unless $str; my %flags = (); my @list = split_words($str); if ( DEBUG_KB && @list ) { local $LIST_SEPARATOR = SPACE; print <<EOM; DEBUG_KB entering for '$short_name' with str=$str\n"; list is: @list; EOM } # Ignore kbb='(' and '[' and '{': can cause unstable math formatting # (issues b1346, b1347, b1348) and likewise ignore kba=')' and ']' and '}' # Also always ignore ? and : (b1440 and b1433-b1439) if ( $short_name eq 'kbb' ) { @list = grep { !m/[\(\[\{\?\:]/ } @list; } elsif ( $short_name eq 'kba' ) { @list = grep { !m/[\)\]\}\?\:]/ } @list; } else { Fault(<<EOM); Bad call arg - received short name '$short_name' but expecting 'kbb' or 'kba' EOM } # pull out any any leading container code, like f( or *{ # For example: 'f(' becomes flags hash entry '(' => 'f' foreach my $item (@list) { if ( $item =~ /^( [ \w\* ] )( [ \{\(\[\}\)\] ] )$/x ) { $item = $2; $flags{$2} = $1; } } my @unknown_types; foreach my $type (@list) { if ( !Perl::Tidy::Tokenizer::is_valid_token_type($type) ) { push @unknown_types, $type; } } if (@unknown_types) { my $num = @unknown_types; local $LIST_SEPARATOR = SPACE; Warn(<<EOM); $num unrecognized token types were input with --$short_name : @unknown_types EOM } @{$rkeep_break_hash}{@list} = (1) x scalar(@list); foreach my $key ( keys %flags ) { my $flag = $flags{$key}; if ( length($flag) != 1 ) { Warn(<<EOM); Multiple entries given for '$key' in '$short_name' EOM } elsif ( ( $key eq '(' || $key eq ')' ) && $flag !~ /^[kKfFwW\*]$/ ) { Warn(<<EOM); Unknown flag '$flag' given for '$key' in '$short_name' EOM } elsif ( ( $key eq '}' || $key eq '}' ) && $flag !~ /^[bB\*]$/ ) { Warn(<<EOM); Unknown flag '$flag' given for '$key' in '$short_name' EOM } else { ## ok - no error seen } $rkeep_break_hash->{$key} = $flag; } if ( DEBUG_KB && @list ) { my @tmp = %flags; local $LIST_SEPARATOR = SPACE; print <<EOM; DEBUG_KB -$short_name flag: $str final keys: @list special flags: @tmp EOM } return; } ## end sub initialize_keep_old_breakpoints sub initialize_global_option_vars { #------------------------------------------------------------ # Make global vars for frequently used options for efficiency #------------------------------------------------------------ $rOpts_add_newlines = $rOpts->{'add-newlines'}; $rOpts_add_trailing_commas = $rOpts->{'add-trailing-commas'}; $rOpts_add_whitespace = $rOpts->{'add-whitespace'}; $rOpts_blank_lines_after_opening_block = $rOpts->{'blank-lines-after-opening-block'}; $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'}; $rOpts_block_brace_vertical_tightness = $rOpts->{'block-brace-vertical-tightness'}; $rOpts_brace_follower_vertical_tightness = $rOpts->{'brace-follower-vertical-tightness'}; $rOpts_break_after_labels = $rOpts->{'break-after-labels'}; $rOpts_break_at_old_attribute_breakpoints = $rOpts->{'break-at-old-attribute-breakpoints'}; $rOpts_break_at_old_comma_breakpoints = $rOpts->{'break-at-old-comma-breakpoints'}; $rOpts_break_at_old_keyword_breakpoints = $rOpts->{'break-at-old-keyword-breakpoints'}; $rOpts_break_at_old_logical_breakpoints = $rOpts->{'break-at-old-logical-breakpoints'}; $rOpts_break_at_old_semicolon_breakpoints = $rOpts->{'break-at-old-semicolon-breakpoints'}; $rOpts_break_at_old_ternary_breakpoints = $rOpts->{'break-at-old-ternary-breakpoints'}; $rOpts_break_open_compact_parens = $rOpts->{'break-open-compact-parens'}; $rOpts_closing_side_comments = $rOpts->{'closing-side-comments'}; $rOpts_closing_side_comment_else_flag = $rOpts->{'closing-side-comment-else-flag'}; $rOpts_closing_side_comment_maximum_text = $rOpts->{'closing-side-comment-maximum-text'}; $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'}; $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'}; $rOpts_cuddled_paren_brace = $rOpts->{'cuddled-paren-brace'}; $rOpts_delete_closing_side_comments = $rOpts->{'delete-closing-side-comments'}; $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'}; $rOpts_extended_continuation_indentation = $rOpts->{'extended-continuation-indentation'}; $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'}; $rOpts_delete_trailing_commas = $rOpts->{'delete-trailing-commas'}; $rOpts_delete_weld_interfering_commas = $rOpts->{'delete-weld-interfering-commas'}; $rOpts_format_skipping = $rOpts->{'format-skipping'}; $rOpts_freeze_whitespace = $rOpts->{'freeze-whitespace'}; $rOpts_function_paren_vertical_alignment = $rOpts->{'function-paren-vertical-alignment'}; $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'}; $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'}; $rOpts_ignore_side_comment_lengths = $rOpts->{'ignore-side-comment-lengths'}; $rOpts_ignore_perlcritic_comments = $rOpts->{'ignore-perlcritic-comments'}; $rOpts_indent_closing_brace = $rOpts->{'indent-closing-brace'}; $rOpts_indent_columns = $rOpts->{'indent-columns'}; $rOpts_indent_only = $rOpts->{'indent-only'}; $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'}; $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'}; $rOpts_extended_block_tightness = $rOpts->{'extended-block-tightness'}; $rOpts_extended_line_up_parentheses = $rOpts->{'extended-line-up-parentheses'}; $rOpts_logical_padding = $rOpts->{'logical-padding'}; $rOpts_maximum_consecutive_blank_lines = $rOpts->{'maximum-consecutive-blank-lines'}; $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'}; $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'}; $rOpts_opening_brace_always_on_right = $rOpts->{'opening-brace-always-on-right'}; $rOpts_outdent_keywords = $rOpts->{'outdent-keywords'}; $rOpts_outdent_labels = $rOpts->{'outdent-labels'}; $rOpts_outdent_long_comments = $rOpts->{'outdent-long-comments'}; $rOpts_outdent_long_quotes = $rOpts->{'outdent-long-quotes'}; $rOpts_outdent_static_block_comments = $rOpts->{'outdent-static-block-comments'}; $rOpts_recombine = $rOpts->{'recombine'}; $rOpts_short_concatenation_item_length = $rOpts->{'short-concatenation-item-length'}; $rOpts_space_prototype_paren = $rOpts->{'space-prototype-paren'}; $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'}; $rOpts_static_block_comments = $rOpts->{'static-block-comments'}; $rOpts_add_missing_else = $rOpts->{'add-missing-else'}; $rOpts_warn_missing_else = $rOpts->{'warn-missing-else'}; $rOpts_tee_block_comments = $rOpts->{'tee-block-comments'}; $rOpts_tee_pod = $rOpts->{'tee-pod'}; $rOpts_tee_side_comments = $rOpts->{'tee-side-comments'}; $rOpts_valign_code = $rOpts->{'valign-code'}; $rOpts_valign_side_comments = $rOpts->{'valign-side-comments'}; $rOpts_valign_if_unless = $rOpts->{'valign-if-unless'}; $rOpts_variable_maximum_line_length = $rOpts->{'variable-maximum-line-length'}; # Note that both opening and closing tokens can access the opening # and closing flags of their container types. %opening_vertical_tightness = ( '(' => $rOpts->{'paren-vertical-tightness'}, '{' => $rOpts->{'brace-vertical-tightness'}, '[' => $rOpts->{'square-bracket-vertical-tightness'}, ')' => $rOpts->{'paren-vertical-tightness'}, '}' => $rOpts->{'brace-vertical-tightness'}, ']' => $rOpts->{'square-bracket-vertical-tightness'}, ); %closing_vertical_tightness = ( '(' => $rOpts->{'paren-vertical-tightness-closing'}, '{' => $rOpts->{'brace-vertical-tightness-closing'}, '[' => $rOpts->{'square-bracket-vertical-tightness-closing'}, ')' => $rOpts->{'paren-vertical-tightness-closing'}, '}' => $rOpts->{'brace-vertical-tightness-closing'}, ']' => $rOpts->{'square-bracket-vertical-tightness-closing'}, ); # assume flag for '>' same as ')' for closing qw quotes %closing_token_indentation = ( ')' => $rOpts->{'closing-paren-indentation'}, '}' => $rOpts->{'closing-brace-indentation'}, ']' => $rOpts->{'closing-square-bracket-indentation'}, '>' => $rOpts->{'closing-paren-indentation'}, ); # flag indicating if any closing tokens are indented $some_closing_token_indentation = $rOpts->{'closing-paren-indentation'} || $rOpts->{'closing-brace-indentation'} || $rOpts->{'closing-square-bracket-indentation'} || $rOpts->{'indent-closing-brace'}; %opening_token_right = ( '(' => $rOpts->{'opening-paren-right'}, '{' => $rOpts->{'opening-hash-brace-right'}, '[' => $rOpts->{'opening-square-bracket-right'}, ); %stack_opening_token = ( '(' => $rOpts->{'stack-opening-paren'}, '{' => $rOpts->{'stack-opening-hash-brace'}, '[' => $rOpts->{'stack-opening-square-bracket'}, ); %stack_closing_token = ( ')' => $rOpts->{'stack-closing-paren'}, '}' => $rOpts->{'stack-closing-hash-brace'}, ']' => $rOpts->{'stack-closing-square-bracket'}, ); return; } ## end sub initialize_global_option_vars sub initialize_line_length_vars { # Create a table of maximum line length vs level for later efficient use. # We will make the tables very long to be sure it will not be exceeded. # But we have to choose a fixed length. A check will be made at the start # of sub 'finish_formatting' to be sure it is not exceeded. Note, some of # my standard test problems have indentation levels of about 150, so this # should be fairly large. If the choice of a maximum level ever becomes # an issue then these table values could be returned in a sub with a simple # memoization scheme. # Also create a table of the maximum spaces available for text due to the # level only. If a line has continuation indentation, then that space must # be subtracted from the table value. This table is used for preliminary # estimates in welding, extended_ci, BBX, and marking short blocks. use constant LEVEL_TABLE_MAX => 1000; # The basic scheme: foreach my $level ( 0 .. LEVEL_TABLE_MAX ) { my $indent = $level * $rOpts_indent_columns; $maximum_line_length_at_level[$level] = $rOpts_maximum_line_length; $maximum_text_length_at_level[$level] = $rOpts_maximum_line_length - $indent; } # Correct the maximum_text_length table if the -wc=n flag is used $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'}; if ($rOpts_whitespace_cycle) { if ( $rOpts_whitespace_cycle > 0 ) { foreach my $level ( 0 .. LEVEL_TABLE_MAX ) { my $level_mod = $level % $rOpts_whitespace_cycle; my $indent = $level_mod * $rOpts_indent_columns; $maximum_text_length_at_level[$level] = $rOpts_maximum_line_length - $indent; } } else { $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'} = 0; } } # Correct the tables if the -vmll flag is used. These values override the # previous values. if ($rOpts_variable_maximum_line_length) { foreach my $level ( 0 .. LEVEL_TABLE_MAX ) { $maximum_text_length_at_level[$level] = $rOpts_maximum_line_length; $maximum_line_length_at_level[$level] = $rOpts_maximum_line_length + $level * $rOpts_indent_columns; } } # Define two measures of indentation level, alpha and beta, at which some # formatting features come under stress and need to start shutting down. # Some combination of the two will be used to shut down different # formatting features. # Put a reasonable upper limit on stress level (say 100) in case the # whitespace-cycle variable is used. my $stress_level_limit = min( 100, LEVEL_TABLE_MAX ); # Find stress_level_alpha, targeted at very short maximum line lengths. $stress_level_alpha = $stress_level_limit + 1; foreach my $level_test ( 0 .. $stress_level_limit ) { my $max_len = $maximum_text_length_at_level[ $level_test + 1 ]; my $excess_inside_space = $max_len - $rOpts_continuation_indentation - $rOpts_indent_columns - 8; if ( $excess_inside_space <= 0 ) { $stress_level_alpha = $level_test; last; } } # Find stress level beta, a stress level targeted at formatting # at deep levels near the maximum line length. We start increasing # from zero and stop at the first level which shows no more space. # 'const' is a fixed number of spaces for a typical variable. # Cases b1197-b1204 work ok with const=12 but not with const=8 my $const = 16; my $denom = max( 1, $rOpts_indent_columns ); $stress_level_beta = 0; foreach my $level ( 0 .. $stress_level_limit ) { my $remaining_cycles = max( 0, ( $maximum_text_length_at_level[$level] - $rOpts_continuation_indentation - $const ) / $denom ); last if ( $remaining_cycles <= 3 ); # 2 does not work $stress_level_beta = $level; } # This is a combined level which works well for turning off formatting # features in most cases: $high_stress_level = min( $stress_level_alpha, $stress_level_beta + 2 ); return; } ## end sub initialize_line_length_vars sub initialize_trailing_comma_rules { # Setup control hash for trailing commas # -wtc=s defines desired trailing comma policy: # # =" " stable # [ both -atc and -dtc ignored ] # =0 : none # [requires -dtc; -atc ignored] # =1 or * : all # [requires -atc; -dtc ignored] # =m : multiline lists require trailing comma # if -atc set => will add missing multiline trailing commas # if -dtc set => will delete trailing single line commas # =b or 'bare' (multiline) lists require trailing comma # if -atc set => will add missing bare trailing commas # if -dtc set => will delete non-bare trailing commas # =h or 'hash': single column stable bare lists require trailing comma # if -atc set will add these # if -dtc set will delete other trailing commas #------------------------------------------------------------------- # This routine must be called after the alpha and beta stress levels # have been defined in sub 'initialize_line_length_vars'. #------------------------------------------------------------------- %trailing_comma_rules = (); my $rvalid_flags = [qw(0 1 * m b h i)]; my $option = $rOpts->{'want-trailing-commas'}; if ($option) { $option =~ s/^\s+//; $option =~ s/\s+$//; } # We need to use length() here because '0' is a possible option if ( defined($option) && length($option) ) { my $error_message; my %rule_hash; my @q = @{$rvalid_flags}; my %is_valid_flag; @is_valid_flag{@q} = (1) x scalar(@q); # handle single character control, such as -wtc='b' if ( length($option) == 1 ) { foreach (qw< ) ] } >) { $rule_hash{$_} = [ $option, EMPTY_STRING ]; } } # handle multi-character control(s), such as -wtc='[m' or -wtc='k(m' else { my @parts = split /\s+/, $option; foreach my $part (@parts) { if ( length($part) >= 2 && length($part) <= 3 ) { my $val = substr( $part, -1, 1 ); my $key_o = substr( $part, -2, 1 ); if ( $is_opening_token{$key_o} ) { my $paren_flag = EMPTY_STRING; if ( length($part) == 3 ) { $paren_flag = substr( $part, 0, 1 ); } my $key = $matching_token{$key_o}; $rule_hash{$key} = [ $val, $paren_flag ]; } else { $error_message .= "Unrecognized term: '$part'\n"; } } else { $error_message .= "Unrecognized term: '$part'\n"; } } } # check for valid control characters if ( !$error_message ) { foreach my $key ( keys %rule_hash ) { my $item = $rule_hash{$key}; my ( $val, $paren_flag ) = @{$item}; if ( $val && !$is_valid_flag{$val} ) { my $valid_str = join( SPACE, @{$rvalid_flags} ); $error_message .= "Unexpected value '$val'; must be one of: $valid_str\n"; last; } if ($paren_flag) { if ( $paren_flag !~ /^[kKfFwW]$/ ) { $error_message .= "Unexpected paren flag '$paren_flag'; must be one of: k K f F w W\n"; last; } if ( $key ne ')' ) { $error_message .= "paren flag '$paren_flag' is only allowed before a '('\n"; last; } } } } if ($error_message) { Warn(<<EOM); Error parsing --want-trailing-commas='$option': $error_message EOM } # Set the control hash if no errors else { %trailing_comma_rules = %rule_hash; } } # Both adding and deleting commas can lead to instability in extreme cases if ( $rOpts_add_trailing_commas && $rOpts_delete_trailing_commas ) { # If the possible instability is significant, then we can turn off # -dtc as a defensive measure to prevent it. # We must turn off -dtc for very small values of --whitespace-cycle # to avoid instability. A minimum value of -wc=3 fixes b1393, but a # value of 4 is used here for safety. This parameter is seldom used, # and much larger than this when used, so the cutoff value is not # critical. if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle <= 4 ) { $rOpts_delete_trailing_commas = 0; } } return; } ## end sub initialize_trailing_comma_rules sub initialize_whitespace_hashes { # This is called once before formatting begins to initialize these global # hashes, which control the use of whitespace around tokens: # # %binary_ws_rules # %want_left_space # %want_right_space # %space_after_keyword # # Many token types are identical to the tokens themselves. # See the tokenizer for a complete list. Here are some special types: # k = perl keyword # f = semicolon in for statement # m = unary minus # p = unary plus # Note that :: is excluded since it should be contained in an identifier # Note that '->' is excluded because it never gets space # parentheses and brackets are excluded since they are handled specially # curly braces are included but may be overridden by logic, such as # newline logic. # NEW_TOKENS: create a whitespace rule here. This can be as # simple as adding your new letter to @spaces_both_sides, for # example. # fix for c250: added space rules new package type 'P' and sub type 'S' my @spaces_both_sides = qw# + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -= .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~ &&= ||= //= <=> A k f w F n C Y U G v P S #; my @spaces_left_side = qw< t ! ~ m p { \ h pp mm Z j >; push( @spaces_left_side, '#' ); # avoids warning message my @spaces_right_side = qw< ; } ) ] R J ++ -- **= >; push( @spaces_right_side, ',' ); # avoids warning message %want_left_space = (); %want_right_space = (); %binary_ws_rules = (); # Note that we setting defaults here. Later in processing # the values of %want_left_space and %want_right_space # may be overridden by any user settings specified by the # -wls and -wrs parameters. However the binary_whitespace_rules # are hardwired and have priority. @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides); @want_right_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides); @want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side); @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side); @want_left_space{@spaces_right_side} = (-1) x scalar(@spaces_right_side); @want_right_space{@spaces_right_side} = (1) x scalar(@spaces_right_side); $want_left_space{'->'} = WS_NO; $want_right_space{'->'} = WS_NO; $want_left_space{'**'} = WS_NO; $want_right_space{'**'} = WS_NO; $want_right_space{'CORE::'} = WS_NO; # These binary_ws_rules are hardwired and have priority over the above # settings. It would be nice to allow adjustment by the user, # but it would be complicated to specify. # # hash type information must stay tightly bound # as in : ${xxxx} $binary_ws_rules{'i'}{'L'} = WS_NO; $binary_ws_rules{'i'}{'{'} = WS_YES; $binary_ws_rules{'k'}{'{'} = WS_YES; $binary_ws_rules{'U'}{'{'} = WS_YES; $binary_ws_rules{'i'}{'['} = WS_NO; $binary_ws_rules{'R'}{'L'} = WS_NO; $binary_ws_rules{'R'}{'{'} = WS_NO; $binary_ws_rules{'t'}{'L'} = WS_NO; $binary_ws_rules{'t'}{'{'} = WS_NO; $binary_ws_rules{'t'}{'='} = WS_OPTIONAL; # for signatures; fixes b1123 $binary_ws_rules{'}'}{'L'} = WS_NO; $binary_ws_rules{'}'}{'{'} = WS_OPTIONAL; # RT#129850; was WS_NO $binary_ws_rules{'$'}{'L'} = WS_NO; $binary_ws_rules{'$'}{'{'} = WS_NO; $binary_ws_rules{'@'}{'L'} = WS_NO; $binary_ws_rules{'@'}{'{'} = WS_NO; $binary_ws_rules{'='}{'L'} = WS_YES; $binary_ws_rules{'J'}{'J'} = WS_YES; # the following includes ') {' # as in : if ( xxx ) { yyy } $binary_ws_rules{']'}{'L'} = WS_NO; $binary_ws_rules{']'}{'{'} = WS_NO; $binary_ws_rules{')'}{'{'} = WS_YES; $binary_ws_rules{')'}{'['} = WS_NO; $binary_ws_rules{']'}{'['} = WS_NO; $binary_ws_rules{']'}{'{'} = WS_NO; $binary_ws_rules{'}'}{'['} = WS_NO; $binary_ws_rules{'R'}{'['} = WS_NO; $binary_ws_rules{']'}{'++'} = WS_NO; $binary_ws_rules{']'}{'--'} = WS_NO; $binary_ws_rules{')'}{'++'} = WS_NO; $binary_ws_rules{')'}{'--'} = WS_NO; $binary_ws_rules{'R'}{'++'} = WS_NO; $binary_ws_rules{'R'}{'--'} = WS_NO; $binary_ws_rules{'i'}{'Q'} = WS_YES; $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()' $binary_ws_rules{'i'}{'('} = WS_NO; $binary_ws_rules{'w'}{'('} = WS_NO; $binary_ws_rules{'w'}{'{'} = WS_YES; return; } ## end sub initialize_whitespace_hashes { #<<< begin closure set_whitespace_flags my %is_special_ws_type; my %is_wCUG; my %is_wi; BEGIN { # The following hash is used to skip over needless if tests. # Be sure to update it when adding new checks in its block. my @q = qw(k w C m - Q); push @q, '#'; @is_special_ws_type{@q} = (1) x scalar(@q); # These hashes replace slower regex tests @q = qw( w C U G ); @is_wCUG{@q} = (1) x scalar(@q); @q = qw( w i ); @is_wi{@q} = (1) x scalar(@q); } ## end BEGIN use constant DEBUG_WHITE => 0; # Hashes to set spaces around container tokens according to their # sequence numbers. These are set as keywords are examined. # They are controlled by the -kpit and -kpitl flags. my %opening_container_inside_ws; my %closing_container_inside_ws; sub set_whitespace_flags { # This routine is called once per file to set whitespace flags for that # file. This routine examines each pair of nonblank tokens and sets a flag # indicating if white space is needed. # # $rwhitespace_flags->[$j] is a flag indicating whether a white space # BEFORE token $j is needed, with the following values: # # WS_NO = -1 do not want a space BEFORE token $j # WS_OPTIONAL= 0 optional space or $j is a whitespace # WS_YES = 1 want a space BEFORE token $j # my $self = shift; my $j_tight_closing_paren = -1; my $rLL = $self->[_rLL_]; my $jmax = @{$rLL} - 1; %opening_container_inside_ws = (); %closing_container_inside_ws = (); my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; my $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'}; my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'}; my $rOpts_space_function_paren = $rOpts->{'space-function-paren'}; my $rwhitespace_flags = []; my $ris_function_call_paren = {}; return $rwhitespace_flags if ( $jmax < 0 ); my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 ); my $last_token = SPACE; my $last_type = 'b'; my $last_token_dbg = SPACE; my $last_type_dbg = 'b'; my $rtokh_last = [ @{ $rLL->[0] } ]; $rtokh_last->[_TOKEN_] = $last_token; $rtokh_last->[_TYPE_] = $last_type; $rtokh_last->[_TYPE_SEQUENCE_] = EMPTY_STRING; $rtokh_last->[_LINE_INDEX_] = 0; my $rtokh_last_last = $rtokh_last; # This will identify braces to be treated as blocks for the -xbt flag my %block_type_for_tightness; my ( $ws_1, $ws_2, $ws_3, $ws_4 ); # main loop over all tokens to define the whitespace flags my $last_type_is_opening; my ( $token, $type ); my $j = -1; foreach my $rtokh ( @{$rLL} ) { $j++; $type = $rtokh->[_TYPE_]; if ( $type eq 'b' ) { $rwhitespace_flags->[$j] = WS_OPTIONAL; next; } $token = $rtokh->[_TOKEN_]; my $ws; #--------------------------------------------------------------- # Whitespace Rules Section 1: # Handle space on the inside of opening braces. #--------------------------------------------------------------- # /^[L\{\(\[]$/ if ($last_type_is_opening) { $last_type_is_opening = 0; my $seqno = $rtokh->[_TYPE_SEQUENCE_]; my $block_type = $rblock_type_of_seqno->{$seqno}; my $last_seqno = $rtokh_last->[_TYPE_SEQUENCE_]; my $last_block_type = $rblock_type_of_seqno->{$last_seqno} || $block_type_for_tightness{$last_seqno}; $j_tight_closing_paren = -1; # let us keep empty matched braces together: () {} [] # except for BLOCKS if ( $token eq $matching_token{$last_token} ) { if ($block_type) { $ws = WS_YES; } else { $ws = WS_NO; } } else { # we're considering the right of an opening brace # tightness = 0 means always pad inside with space # tightness = 1 means pad inside if "complex" # tightness = 2 means never pad inside with space my $tightness; if ( $last_block_type && $last_token eq '{' ) { $tightness = $rOpts_block_brace_tightness; } else { $tightness = $tightness{$last_token} } #============================================================= # Patch for test problem <<snippets/fabrice_bug.in>> # We must always avoid spaces around a bare word beginning # with ^ as in: # my $before = ${^PREMATCH}; # Because all of the following cause an error in perl: # my $before = ${ ^PREMATCH }; # my $before = ${ ^PREMATCH}; # my $before = ${^PREMATCH }; # So if brace tightness flag is -bt=0 we must temporarily reset # to bt=1. Note that here we must set tightness=1 and not 2 so # that the closing space is also avoided # (via the $j_tight_closing_paren flag in coding) if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 } #============================================================= if ( $tightness <= 0 ) { $ws = WS_YES; } elsif ( $tightness > 1 ) { $ws = WS_NO; } else { # find the index of the closing token my $j_closing = $self->[_K_closing_container_]->{$last_seqno}; # If the closing token is less than five characters ahead # we must take a closer look if ( defined($j_closing) && $j_closing - $j < 5 && $rLL->[$j_closing]->[_TYPE_SEQUENCE_] eq $last_seqno ) { $ws = ws_in_container( $j, $j_closing, $rLL, $type, $token, $last_token ); if ( $ws == WS_NO ) { $j_tight_closing_paren = $j_closing; } } else { $ws = WS_YES; } } } # check for special cases which override the above rules if ( %opening_container_inside_ws && $last_seqno ) { my $ws_override = $opening_container_inside_ws{$last_seqno}; if ($ws_override) { $ws = $ws_override } } $ws_4 = $ws_3 = $ws_2 = $ws_1 = $ws if DEBUG_WHITE; } ## end setting space flag inside opening tokens #--------------------------------------------------------------- # Whitespace Rules Section 2: # Special checks for certain types ... #--------------------------------------------------------------- # The hash '%is_special_ws_type' significantly speeds up this routine, # but be sure to update it if a new check is added. # Currently has types: qw(k w C m - Q #) if ( $is_special_ws_type{$type} ) { if ( $type eq 'k' ) { # Keywords 'for', 'foreach' are special cases for -kpit since # the opening paren does not always immediately follow the # keyword. So we have to search forward for the paren in this # case. I have limited the search to 10 tokens ahead, just in # case somebody has a big file and no opening paren. This # should be enough for all normal code. Added the level check # to fix b1236. if ( $is_for_foreach{$token} && %keyword_paren_inner_tightness && defined( $keyword_paren_inner_tightness{$token} ) && $j < $jmax ) { my $level = $rLL->[$j]->[_LEVEL_]; my $jp = $j; ## NOTE: we might use the KNEXT variable to avoid this loop ## but profiling shows that little would be saved foreach my $inc ( 1 .. 9 ) { $jp++; last if ( $jp > $jmax ); last if ( $rLL->[$jp]->[_LEVEL_] != $level ); # b1236 next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' ); my $seqno_p = $rLL->[$jp]->[_TYPE_SEQUENCE_]; set_container_ws_by_keyword( $token, $seqno_p ); last; } } } # handle a comment elsif ( $type eq '#' ) { # newline before block comment ($j==0), and # space before side comment ($j>0), so .. $ws = WS_YES; #--------------------------------- # Nothing more to do for a comment #--------------------------------- $rwhitespace_flags->[$j] = $ws; next; } # space_backslash_quote; RT #123774 <<snippets/rt123774.in>> # allow a space between a backslash and single or double quote # to avoid fooling html formatters elsif ( $type eq 'Q' ) { if ( $last_type eq '\\' && $token =~ /^[\"\']/ ) { $ws = !$rOpts_space_backslash_quote ? WS_NO : $rOpts_space_backslash_quote == 1 ? WS_OPTIONAL : $rOpts_space_backslash_quote == 2 ? WS_YES : WS_YES; } } # retain any space between '-' and bare word elsif ( $type eq 'w' || $type eq 'C' ) { $ws = WS_OPTIONAL if $last_type eq '-'; } # retain any space between '-' and bare word; for example # avoid space between 'USER' and '-' here: <<snippets/space2.in>> # $myhash{USER-NAME}='steve'; elsif ( $type eq 'm' || $type eq '-' ) { $ws = WS_OPTIONAL if ( $last_type eq 'w' ); } else { # A type $type was entered in %is_special_ws_type but # there is no code block to handle it. Either remove it # from the hash or add a code block to handle it. DEVEL_MODE && Fault("no code to handle type $type\n"); } } ## end elsif ( $is_special_ws_type{$type} ... #--------------------------------------------------------------- # Whitespace Rules Section 3: # Handle space on inside of closing brace pairs. #--------------------------------------------------------------- # /[\}\)\]R]/ elsif ( $is_closing_type{$type} ) { my $seqno = $rtokh->[_TYPE_SEQUENCE_]; if ( $j == $j_tight_closing_paren ) { $j_tight_closing_paren = -1; $ws = WS_NO; } else { if ( !defined($ws) ) { my $tightness; my $block_type = $rblock_type_of_seqno->{$seqno} || $block_type_for_tightness{$seqno}; if ( $block_type && $token eq '}' ) { $tightness = $rOpts_block_brace_tightness; } else { $tightness = $tightness{$token} } $ws = ( $tightness > 1 ) ? WS_NO : WS_YES; } } # check for special cases which override the above rules if ( %closing_container_inside_ws && $seqno ) { my $ws_override = $closing_container_inside_ws{$seqno}; if ($ws_override) { $ws = $ws_override } } $ws_4 = $ws_3 = $ws_2 = $ws if DEBUG_WHITE; } ## end setting space flag inside closing tokens #--------------------------------------------------------------- # Whitespace Rules Section 4: #--------------------------------------------------------------- # /^[L\{\(\[]$/ elsif ( $is_opening_type{$type} ) { $last_type_is_opening = 1; if ( $token eq '(' ) { my $seqno = $rtokh->[_TYPE_SEQUENCE_]; # This will have to be tweaked as tokenization changes. # We usually want a space at '} (', for example: # <<snippets/space1.in>> # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s ); # # But not others: # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } ); # At present, the above & block is marked as type L/R so this # case won't go through here. if ( $last_type eq '}' && $last_token ne ')' ) { $ws = WS_YES } # NOTE: some older versions of Perl had occasional problems if # spaces are introduced between keywords or functions and # opening parens. So the default is not to do this except is # certain cases. The current Perl seems to tolerate spaces. # Space between keyword and '(' elsif ( $last_type eq 'k' ) { $ws = WS_NO unless ( $rOpts_space_keyword_paren || $space_after_keyword{$last_token} ); # Set inside space flag if requested set_container_ws_by_keyword( $last_token, $seqno ); } # Space between function and '(' # ----------------------------------------------------- # 'w' and 'i' checks for something like: # myfun( &myfun( ->myfun( # ----------------------------------------------------- # Note that at this point an identifier may still have a # leading arrow, but the arrow will be split off during token # respacing. After that, the token may become a bare word # without leading arrow. The point is, it is best to mark # function call parens right here before that happens. # Patch: added 'C' to prevent blinker, case b934, i.e. 'pi()' # NOTE: this would be the place to allow spaces between # repeated parens, like () () (), as in case c017, but I # decided that would not be a good idea. # Updated to allow detached '->' from tokenizer (issue c140) elsif ( # /^[wCUG]$/ $is_wCUG{$last_type} || ( # /^[wi]$/ $is_wi{$last_type} && ( # with prefix '->' or '&' $last_token =~ /^([\&]|->)/ # or preceding token '->' (see b1337; c140) || $rtokh_last_last->[_TYPE_] eq '->' # or preceding sub call operator token '&' || ( $rtokh_last_last->[_TYPE_] eq 't' && $rtokh_last_last->[_TOKEN_] =~ /^\&\s*$/ ) ) ) ) { $ws = $rOpts_space_function_paren ? $self->ws_space_function_paren( $j, $rtokh_last_last ) : WS_NO; set_container_ws_by_keyword( $last_token, $seqno ); $ris_function_call_paren->{$seqno} = 1; } # space between something like $i and ( in 'snippets/space2.in' # for $i ( 0 .. 20 ) { elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) { $ws = WS_YES; } # allow constant function followed by '()' to retain no space elsif ($last_type eq 'C' && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' ) { $ws = WS_NO; } else { # ok - opening paren not covered by a special rule } } # patch for SWITCH/CASE: make space at ']{' optional # since the '{' might begin a case or when block elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) { $ws = WS_OPTIONAL; } else { # ok - opening type not covered by a special rule } # keep space between 'sub' and '{' for anonymous sub definition, # be sure type = 'k' (added for c140) if ( $type eq '{' ) { if ( $last_token eq 'sub' && $last_type eq 'k' ) { $ws = WS_YES; } # this is needed to avoid no space in '){' if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES } # avoid any space before the brace or bracket in something like # @opts{'a','b',...} if ( $last_type eq 'i' && $last_token =~ /^\@/ ) { $ws = WS_NO; } } # The --extended-block-tightness option allows certain braces # to be treated as blocks just for setting inner whitespace if ( $rOpts_extended_block_tightness && $token eq '{' ) { my $seqno = $rtokh->[_TYPE_SEQUENCE_]; if ( !$rblock_type_of_seqno->{$seqno} && $extended_block_tightness_list{$last_token} ) { # Ok - make this brace a block type for tightness only $block_type_for_tightness{$seqno} = $last_token; } } } ## end elsif ( $is_opening_type{$type} ) { else { # ok: $type not opening, closing, or covered by a special rule } # always preserve whatever space was used after a possible # filehandle (except _) or here doc operator if ( ( ( $last_type eq 'Z' && $last_token ne '_' ) || $last_type eq 'h' ) && $type ne '#' # no longer required due to early exit for '#' above ) { # no space for '$ {' even if '$' is marked as type 'Z', issue c221 if ( $last_type eq 'Z' && $last_token eq '$' && $token eq '{' ) { $ws = WS_NO; } else { $ws = WS_OPTIONAL; } } $ws_4 = $ws_3 = $ws if DEBUG_WHITE; if ( !defined($ws) ) { #--------------------------------------------------------------- # Whitespace Rules Section 4: # Use the binary rule table. #--------------------------------------------------------------- if ( defined( $binary_ws_rules{$last_type}{$type} ) ) { $ws = $binary_ws_rules{$last_type}{$type}; $ws_4 = $ws if DEBUG_WHITE; } #--------------------------------------------------------------- # Whitespace Rules Section 5: # Apply default rules not covered above. #--------------------------------------------------------------- # If we fall through to here, look at the pre-defined hash tables # for the two tokens, and: # if (they are equal) use the common value # if (either is zero or undef) use the other # if (either is -1) use it # That is, # left vs right # 1 vs 1 --> 1 # 0 vs 0 --> 0 # -1 vs -1 --> -1 # # 0 vs -1 --> -1 # 0 vs 1 --> 1 # 1 vs 0 --> 1 # -1 vs 0 --> -1 # # -1 vs 1 --> -1 # 1 vs -1 --> -1 else { my $wl = $want_left_space{$type}; my $wr = $want_right_space{$last_type}; if ( !defined($wl) ) { $ws = defined($wr) ? $wr : 0; } elsif ( !defined($wr) ) { $ws = $wl; } else { $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr; } } } # Treat newline as a whitespace. Otherwise, we might combine # 'Send' and '-recipients' here according to the above rules: # <<snippets/space3.in>> # my $msg = new Fax::Send # -recipients => $to, # -data => $data; if ( !$ws && $rtokh->[_LINE_INDEX_] != $rtokh_last->[_LINE_INDEX_] ) { $ws = WS_YES; } $rwhitespace_flags->[$j] = $ws; # remember non-blank, non-comment tokens $last_token = $token; $last_type = $type; $rtokh_last_last = $rtokh_last; $rtokh_last = $rtokh; # Programming note: for some reason, it is very much faster to 'next' # out of this loop here than to put the DEBUG coding in a block. # But note that the debug code must then update its own copies # of $last_token and $last_type. next if ( !DEBUG_WHITE ); my $str = substr( $last_token_dbg, 0, 15 ); $str .= SPACE x ( 16 - length($str) ); if ( !defined($ws_1) ) { $ws_1 = "*" } if ( !defined($ws_2) ) { $ws_2 = "*" } if ( !defined($ws_3) ) { $ws_3 = "*" } if ( !defined($ws_4) ) { $ws_4 = "*" } print {*STDOUT} "NEW WHITE: i=$j $str $last_type_dbg $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n"; # reset for next pass $ws_1 = $ws_2 = $ws_3 = $ws_4 = undef; $last_token_dbg = $token; $last_type_dbg = $type; } ## end main loop if ( $rOpts->{'tight-secret-operators'} ) { new_secret_operator_whitespace( $rLL, $rwhitespace_flags ); } $self->[_ris_function_call_paren_] = $ris_function_call_paren; return $rwhitespace_flags; } ## end sub set_whitespace_flags sub set_container_ws_by_keyword { my ( $word, $sequence_number ) = @_; return unless (%keyword_paren_inner_tightness); # We just saw a keyword (or other function name) followed by an opening # paren. Now check to see if the following paren should have special # treatment for its inside space. If so we set a hash value using the # sequence number as key. if ( $word && $sequence_number ) { my $tightness = $keyword_paren_inner_tightness{$word}; if ( defined($tightness) && $tightness != 1 ) { my $ws_flag = $tightness == 0 ? WS_YES : WS_NO; $opening_container_inside_ws{$sequence_number} = $ws_flag; $closing_container_inside_ws{$sequence_number} = $ws_flag; } } return; } ## end sub set_container_ws_by_keyword sub ws_in_container { my ( $j, $j_closing, $rLL, $type, $token, $last_token ) = @_; # Given: # $j = index of token following an opening container token # $type, $token = the type and token at index $j # $j_closing = closing token of the container # $last_token = the opening token of the container # Return: # WS_NO if there is just one token in the container (with exceptions) # WS_YES otherwise #------------------------------------ # Look forward for the closing token; #------------------------------------ if ( $j + 1 > $j_closing ) { return WS_NO } # Patch to count '-foo' as single token so that # each of $a{-foo} and $a{foo} and $a{'foo'} do # not get spaces with default formatting. my $j_here = $j; ++$j_here if ( $token eq '-' && $last_token eq '{' && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' ); # Patch to count a sign separated from a number as a single token, as # in the following line. Otherwise, it takes two steps to converge: # deg2rad(- 0.5) if ( ( $type eq 'm' || $type eq 'p' ) && $j < $j_closing + 1 && $rLL->[ $j + 1 ]->[_TYPE_] eq 'b' && $rLL->[ $j + 2 ]->[_TYPE_] eq 'n' && $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ ) { $j_here = $j + 2; } # $j_next is where a closing token should be if the container has # just a "single" token if ( $j_here + 1 > $j_closing ) { return WS_NO } my $j_next = ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' ) ? $j_here + 2 : $j_here + 1; #----------------------------------------------------------------- # Now decide: if we get to the closing token we will keep it tight #----------------------------------------------------------------- if ( $j_next == $j_closing # OLD PROBLEM: but watch out for this: [ [ ] (misc.t) # No longer necessary because of the previous check on sequence numbers ##&& $last_token ne $token # double diamond is usually spaced && $token ne '<<>>' ) { return WS_NO; } return WS_YES; } ## end sub ws_in_container sub ws_space_function_paren { my ( $self, $j, $rtokh_last_last ) = @_; # Called if --space-function-paren is set to see if it might cause # a problem. The manual warns the user about potential problems with # this flag. Here we just try to catch one common problem. # Given: # $j = index of '(' after function name # Return: # WS_NO if no space # WS_YES otherwise # This was added to fix for issue c166. Ignore -sfp at a possible indirect # object location. For example, do not convert this: # print header() ... # to this: # print header () ... # because in this latter form, header may be taken to be a file handle # instead of a function call. # Start with the normal value for -sfp: my $ws = WS_YES; # now check to be sure we don't cause a problem: my $type_ll = $rtokh_last_last->[_TYPE_]; my $tok_ll = $rtokh_last_last->[_TOKEN_]; # NOTE: this is just a minimal check. For example, we might also check # for something like this: # print ( header ( .. if ( $type_ll eq 'k' && $is_indirect_object_taker{$tok_ll} ) { $ws = WS_NO; } return $ws; } ## end sub ws_space_function_paren } ## end closure set_whitespace_flags sub dump_want_left_space { my $fh = shift; local $LIST_SEPARATOR = "\n"; $fh->print(<<EOM); These values are the main control of whitespace to the left of a token type; They may be altered with the -wls parameter. For a list of token types, use perltidy --dump-token-types (-dtt) 1 means the token wants a space to its left -1 means the token does not want a space to its left ------------------------------------------------------------------------ EOM foreach my $key ( sort keys %want_left_space ) { $fh->print("$key\t$want_left_space{$key}\n"); } return; } ## end sub dump_want_left_space sub dump_want_right_space { my $fh = shift; local $LIST_SEPARATOR = "\n"; $fh->print(<<EOM); These values are the main control of whitespace to the right of a token type; They may be altered with the -wrs parameter. For a list of token types, use perltidy --dump-token-types (-dtt) 1 means the token wants a space to its right -1 means the token does not want a space to its right ------------------------------------------------------------------------ EOM foreach my $key ( sort keys %want_right_space ) { $fh->print("$key\t$want_right_space{$key}\n"); } return; } ## end sub dump_want_right_space { ## begin closure is_essential_whitespace my %is_sort_grep_map; my %is_for_foreach; my %is_digraph; my %is_trigraph; my %essential_whitespace_filter_l1; my %essential_whitespace_filter_r1; my %essential_whitespace_filter_l2; my %essential_whitespace_filter_r2; my %is_type_with_space_before_bareword; my %is_special_variable_char; BEGIN { my @q; # NOTE: This hash is like the global %is_sort_map_grep, but it ignores # grep aliases on purpose, since here we are looking parens, not braces @q = qw(sort grep map); @is_sort_grep_map{@q} = (1) x scalar(@q); @q = qw(for foreach); @is_for_foreach{@q} = (1) x scalar(@q); @q = qw( .. :: << >> ** && || // -> => += -= .= %= &= |= ^= *= <> <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^. ); @is_digraph{@q} = (1) x scalar(@q); @q = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~); @is_trigraph{@q} = (1) x scalar(@q); # These are used as a speedup filters for sub is_essential_whitespace. # Filter 1: # These left side token types USUALLY do not require a space: @q = qw( ; { } [ ] L R ); push @q, ','; push @q, ')'; push @q, '('; @essential_whitespace_filter_l1{@q} = (1) x scalar(@q); # BUT some might if followed by these right token types @q = qw( pp mm << <<= h ); @essential_whitespace_filter_r1{@q} = (1) x scalar(@q); # Filter 2: # These right side filters usually do not require a space @q = qw( ; ] R } ); push @q, ','; push @q, ')'; @essential_whitespace_filter_r2{@q} = (1) x scalar(@q); # BUT some might if followed by these left token types @q = qw( h Z ); @essential_whitespace_filter_l2{@q} = (1) x scalar(@q); # Keep a space between certain types and any bareword: # Q: keep a space between a quote and a bareword to prevent the # bareword from becoming a quote modifier. # &: do not remove space between an '&' and a bare word because # it may turn into a function evaluation, like here # between '&' and 'O_ACCMODE', producing a syntax error [File.pm] # $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY); @q = qw( Q & ); @is_type_with_space_before_bareword{@q} = (1) x scalar(@q); # These are the only characters which can (currently) form special # variables, like $^W: (issue c066, c068). @q = qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ }; @{is_special_variable_char}{@q} = (1) x scalar(@q); } ## end BEGIN sub is_essential_whitespace { # Essential whitespace means whitespace which cannot be safely deleted # without risking the introduction of a syntax error. # We are given three tokens and their types: # ($tokenl, $typel) is the token to the left of the space in question # ($tokenr, $typer) is the token to the right of the space in question # ($tokenll, $typell) is previous nonblank token to the left of $tokenl # # Note1: This routine should almost never need to be changed. It is # for avoiding syntax problems rather than for formatting. # Note2: The -mangle option causes large numbers of calls to this # routine and therefore is a good test. So if a change is made, be sure # to use nytprof to profile with both old and revised coding using the # -mangle option and check differences. my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_; # This is potentially a very slow routine but the following quick # filters typically catch and handle over 90% of the calls. # Filter 1: usually no space required after common types ; , [ ] { } ( ) return if ( $essential_whitespace_filter_l1{$typel} && !$essential_whitespace_filter_r1{$typer} ); # Filter 2: usually no space before common types ; , return if ( $essential_whitespace_filter_r2{$typer} && !$essential_whitespace_filter_l2{$typel} ); # Filter 3: Handle side comments: a space is only essential if the left # token ends in '$' For example, we do not want to create $#foo below: # sub t086 # ( #foo))) # $ #foo))) # a #foo))) # ) #foo))) # { ... } # Also, I prefer not to put a ? and # together because ? used to be # a pattern delimiter and spacing was used if guessing was needed. if ( $typer eq '#' ) { return 1 if ( $tokenl && ( $typel eq '?' || substr( $tokenl, -1 ) eq '$' ) ); return; } my $tokenr_is_bareword = $tokenr =~ /^\w/ && $tokenr !~ /^\d/; my $tokenr_is_open_paren = $tokenr eq '('; my $token_joined = $tokenl . $tokenr; my $tokenl_is_dash = $tokenl eq '-'; my $result = # never combine two bare words or numbers # examples: and ::ok(1) # return ::spw(...) # for bla::bla:: abc # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl # $input eq"quit" to make $inputeq"quit" # my $size=-s::SINK if $file; <==OK but we won't do it # don't join something like: for bla::bla:: abc # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl ( ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' ) && ( $tokenr =~ /^([\'\w]|\:\:)/ ) ) # do not combine a number with a concatenation dot # example: pom.caputo: # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n"); || $typel eq 'n' && $tokenr eq '.' || $typer eq 'n' && $tokenl eq '.' # cases of a space before a bareword... || ( $tokenr_is_bareword && ( # do not join a minus with a bare word, because you might form # a file test operator. Example from Complex.pm: # if (CORE::abs($z - i) < $eps); # "z-i" would be taken as a file test. $tokenl_is_dash && length($tokenr) == 1 # and something like this could become ambiguous without space # after the '-': # use constant III=>1; # $a = $b - III; # and even this: # $a = - III; || $tokenl_is_dash && $typer =~ /^[wC]$/ # keep space between types Q & and a bareword || $is_type_with_space_before_bareword{$typel} # +-: binary plus and minus before a bareword could get # converted into unary plus and minus on next pass through the # tokenizer. This can lead to blinkers: cases b660 b670 b780 # b781 b787 b788 b790 So we keep a space unless the +/- clearly # follows an operator || ( ( $typel eq '+' || $typel eq '-' ) && $typell !~ /^[niC\)\}\]R]$/ ) # keep a space between a token ending in '$' and any word; # this caused trouble: "die @$ if $@" || $typel eq 'i' && substr( $tokenl, -1, 1 ) eq '$' # don't combine $$ or $# with any alphanumeric # (testfile mangle.t with --mangle) || $tokenl eq '$$' || $tokenl eq '$#' ) ) ## end $tokenr_is_bareword # OLD, not used # '= -' should not become =- or you will get a warning # about reversed -= # || ($tokenr eq '-') # do not join a bare word with a minus, like between 'Send' and # '-recipients' here <<snippets/space3.in>> # my $msg = new Fax::Send # -recipients => $to, # -data => $data; # This is the safest thing to do. If we had the token to the right of # the minus we could do a better check. # # And do not combine a bareword and a quote, like this: # oops "Your login, $Bad_Login, is not valid"; # It can cause a syntax error if oops is a sub || $typel eq 'w' && ( $tokenr eq '-' || $typer eq 'Q' ) # perl is very fussy about spaces before << || substr( $tokenr, 0, 2 ) eq '<<' # avoid combining tokens to create new meanings. Example: # $a+ +$b must not become $a++$b || ( $is_digraph{$token_joined} ) || $is_trigraph{$token_joined} # another example: do not combine these two &'s: # allow_options & &OPT_EXECCGI || $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } # retain any space after possible filehandle # (testfiles prnterr1.t with --extrude and mangle.t with --mangle) # but no space for '$ {' even if '$' is marked as type 'Z', issue c221 || ( $typel eq 'Z' && !( $tokenl eq '$' && $tokenr eq '{' ) ) # Added 'Y' here 16 Jan 2021 to prevent -mangle option from removing # space after type Y. Otherwise, it will get parsed as type 'Z' later # and any space would have to be added back manually if desired. || $typel eq 'Y' # Perl is sensitive to whitespace after the + here: # $b = xvals $a + 0.1 * yvals $a; || $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ || ( $tokenr_is_open_paren && ( # keep paren separate in 'use Foo::Bar ()' ( $typel eq 'w' && $typell eq 'k' && $tokenll eq 'use' ) # OLD: keep any space between filehandle and paren: # file mangle.t with --mangle: # NEW: this test is no longer necessary here (moved above) ## || $typel eq 'Y' # must have space between grep and left paren; "grep(" will fail || $is_sort_grep_map{$tokenl} # don't stick numbers next to left parens, as in: #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm) || $typel eq 'n' ) ) ## end $tokenr_is_open_paren # retain any space after here doc operator ( hereerr.t) || $typel eq 'h' # be careful with a space around ++ and --, to avoid ambiguity as to # which token it applies || ( $typer eq 'pp' || $typer eq 'mm' ) && $tokenl !~ /^[\;\{\(\[]/ || ( $typel eq '++' || $typel eq '--' ) && $tokenr !~ /^[\;\}\)\]]/ # need space after foreach my; for example, this will fail in # older versions of Perl: # foreach my$ft(@filetypes)... || ( $tokenl eq 'my' && substr( $tokenr, 0, 1 ) eq '$' # /^(for|foreach)$/ && $is_for_foreach{$tokenll} ) # Keep space after like $^ if needed to avoid forming a different # special variable (issue c068). For example: # my $aa = $^ ? "none" : "ok"; || ( $typel eq 'i' && length($tokenl) == 2 && substr( $tokenl, 1, 1 ) eq '^' && $is_special_variable_char{ substr( $tokenr, 0, 1 ) } ) # We must be sure that a space between a ? and a quoted string # remains if the space before the ? remains. [Loca.pm, lockarea] # ie, # $b=join $comma ? ',' : ':', @_; # ok # $b=join $comma?',' : ':', @_; # ok! # $b=join $comma ?',' : ':', @_; # error! # Not really required: ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) ) # Space stacked labels... # Not really required: Perl seems to accept non-spaced labels. ## || $typel eq 'J' && $typer eq 'J' ; # the value of this long logic sequence is the result we want return $result; } ## end sub is_essential_whitespace } ## end closure is_essential_whitespace { ## begin closure new_secret_operator_whitespace my %secret_operators; my %is_leading_secret_token; BEGIN { # token lists for perl secret operators as compiled by Philippe Bruhat # at: https://metacpan.org/module/perlsecret %secret_operators = ( 'Goatse' => [qw#= ( ) =#], #=( )= 'Venus1' => [qw#0 +#], # 0+ 'Venus2' => [qw#+ 0#], # +0 'Enterprise' => [qw#) x ! !#], # ()x!! 'Kite1' => [qw#~ ~ <>#], # ~~<> 'Kite2' => [qw#~~ <>#], # ~~<> 'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=> 'Bang bang ' => [qw#! !#], # !! ); # The following operators and constants are not included because they # are normally kept tight by perltidy: # ~~ <~> # # Make a lookup table indexed by the first token of each operator: # first token => [list, list, ...] foreach my $value ( values(%secret_operators) ) { my $tok = $value->[0]; push @{ $is_leading_secret_token{$tok} }, $value; } } ## end BEGIN sub new_secret_operator_whitespace { my ( $rlong_array, $rwhitespace_flags ) = @_; # Loop over all tokens in this line my ( $token, $type ); my $jmax = @{$rlong_array} - 1; foreach my $j ( 0 .. $jmax ) { $token = $rlong_array->[$j]->[_TOKEN_]; $type = $rlong_array->[$j]->[_TYPE_]; # Skip unless this token might start a secret operator next if ( $type eq 'b' ); next unless ( $is_leading_secret_token{$token} ); # Loop over all secret operators with this leading token foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) { my $jend = $j - 1; foreach my $tok ( @{$rpattern} ) { $jend++; $jend++ if ( $jend <= $jmax && $rlong_array->[$jend]->[_TYPE_] eq 'b' ); if ( $jend > $jmax || $tok ne $rlong_array->[$jend]->[_TOKEN_] ) { $jend = undef; last; } } if ($jend) { # set flags to prevent spaces within this operator foreach my $jj ( $j + 1 .. $jend ) { $rwhitespace_flags->[$jj] = WS_NO; } $j = $jend; last; } } ## End Loop over all operators } ## End loop over all tokens return; } ## end sub new_secret_operator_whitespace } ## end closure new_secret_operator_whitespace { ## begin closure set_bond_strengths # These routines and variables are involved in deciding where to break very # long lines. # NEW_TOKENS must add bond strength rules my %is_good_keyword_breakpoint; my %is_container_token; my %binary_bond_strength_nospace; my %binary_bond_strength; my %nobreak_lhs; my %nobreak_rhs; my @bias_tokens; my %bias_hash; my %bias; my $delta_bias; sub initialize_bond_strength_hashes { my @q; @q = qw(if unless while until for foreach); @is_good_keyword_breakpoint{@q} = (1) x scalar(@q); @q = qw/ ( [ { } ] ) /; @is_container_token{@q} = (1) x scalar(@q); # The decision about where to break a line depends upon a "bond # strength" between tokens. The LOWER the bond strength, the MORE # likely a break. A bond strength may be any value but to simplify # things there are several pre-defined strength levels: # NO_BREAK => 10000; # VERY_STRONG => 100; # STRONG => 2.1; # NOMINAL => 1.1; # WEAK => 0.8; # VERY_WEAK => 0.55; # The strength values are based on trial-and-error, and need to be # tweaked occasionally to get desired results. Some comments: # # 1. Only relative strengths are important. small differences # in strengths can make big formatting differences. # 2. Each indentation level adds one unit of bond strength. # 3. A value of NO_BREAK makes an unbreakable bond # 4. A value of VERY_WEAK is the strength of a ',' # 5. Values below NOMINAL are considered ok break points. # 6. Values above NOMINAL are considered poor break points. # # The bond strengths should roughly follow precedence order where # possible. If you make changes, please check the results very # carefully on a variety of scripts. Testing with the -extrude # options is particularly helpful in exercising all of the rules. # Wherever possible, bond strengths are defined in the following # tables. There are two main stages to setting bond strengths and # two types of tables: # # The first stage involves looking at each token individually and # defining left and right bond strengths, according to if we want # to break to the left or right side, and how good a break point it # is. For example tokens like =, ||, && make good break points and # will have low strengths, but one might want to break on either # side to put them at the end of one line or beginning of the next. # # The second stage involves looking at certain pairs of tokens and # defining a bond strength for that particular pair. This second # stage has priority. #--------------------------------------------------------------- # Bond Strength BEGIN Section 1. # Set left and right bond strengths of individual tokens. #--------------------------------------------------------------- # NOTE: NO_BREAK's set in this section first are HINTS which will # probably not be honored. Essential NO_BREAKS's should be set in # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end # of this subroutine. # Note that we are setting defaults in this section. The user # cannot change bond strengths but can cause the left and right # bond strengths of any token type to be swapped through the use of # the -wba and -wbb flags. In this way the user can determine if a # breakpoint token should appear at the end of one line or the # beginning of the next line. %right_bond_strength = (); %left_bond_strength = (); %binary_bond_strength_nospace = (); %binary_bond_strength = (); %nobreak_lhs = (); %nobreak_rhs = (); # The hash keys in this section are token types, plus the text of # certain keywords like 'or', 'and'. # no break around possible filehandle $left_bond_strength{'Z'} = NO_BREAK; $right_bond_strength{'Z'} = NO_BREAK; # never put a bare word on a new line: # example print (STDERR, "bla"); will fail with break after ( $left_bond_strength{'w'} = NO_BREAK; # blanks always have infinite strength to force breaks after # real tokens $right_bond_strength{'b'} = NO_BREAK; # try not to break on exponentiation @q = qw# ** .. ... <=> #; @left_bond_strength{@q} = (STRONG) x scalar(@q); @right_bond_strength{@q} = (STRONG) x scalar(@q); # The comma-arrow has very low precedence but not a good break point $left_bond_strength{'=>'} = NO_BREAK; $right_bond_strength{'=>'} = NOMINAL; # ok to break after label $left_bond_strength{'J'} = NO_BREAK; $right_bond_strength{'J'} = NOMINAL; $left_bond_strength{'j'} = STRONG; $right_bond_strength{'j'} = STRONG; $left_bond_strength{'A'} = STRONG; $right_bond_strength{'A'} = STRONG; $left_bond_strength{'->'} = STRONG; $right_bond_strength{'->'} = VERY_STRONG; $left_bond_strength{'CORE::'} = NOMINAL; $right_bond_strength{'CORE::'} = NO_BREAK; # Fix for c250: added strengths for new type 'P' # Note: these are working okay, but may eventually need to be # adjusted or even removed. $left_bond_strength{'P'} = NOMINAL; $right_bond_strength{'P'} = NOMINAL; # breaking AFTER modulus operator is ok: @q = qw< % >; @left_bond_strength{@q} = (STRONG) x scalar(@q); @right_bond_strength{@q} = ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q); # Break AFTER math operators * and / @q = qw< * / x >; @left_bond_strength{@q} = (STRONG) x scalar(@q); @right_bond_strength{@q} = (NOMINAL) x scalar(@q); # Break AFTER weakest math operators + and - # Make them weaker than * but a bit stronger than '.' @q = qw< + - >; @left_bond_strength{@q} = (STRONG) x scalar(@q); @right_bond_strength{@q} = ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q); # Define left strength of unary plus and minus (fixes case b511) $left_bond_strength{p} = $left_bond_strength{'+'}; $left_bond_strength{m} = $left_bond_strength{'-'}; # And make right strength of unary plus and minus very high. # Fixes cases b670 b790 $right_bond_strength{p} = NO_BREAK; $right_bond_strength{m} = NO_BREAK; # breaking BEFORE these is just ok: @q = qw# >> << #; @right_bond_strength{@q} = (STRONG) x scalar(@q); @left_bond_strength{@q} = (NOMINAL) x scalar(@q); # breaking before the string concatenation operator seems best # because it can be hard to see at the end of a line $right_bond_strength{'.'} = STRONG; $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK; @q = qw< } ] ) R >; @left_bond_strength{@q} = (STRONG) x scalar(@q); @right_bond_strength{@q} = (NOMINAL) x scalar(@q); # make these a little weaker than nominal so that they get # favored for end-of-line characters @q = qw< != == =~ !~ ~~ !~~ >; @left_bond_strength{@q} = (STRONG) x scalar(@q); @right_bond_strength{@q} = ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q); # break AFTER these @q = qw# < > | & >= <= #; @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q); @right_bond_strength{@q} = ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q); # breaking either before or after a quote is ok # but bias for breaking before a quote $left_bond_strength{'Q'} = NOMINAL; $right_bond_strength{'Q'} = NOMINAL + 0.02; $left_bond_strength{'q'} = NOMINAL; $right_bond_strength{'q'} = NOMINAL; # starting a line with a keyword is usually ok $left_bond_strength{'k'} = NOMINAL; # we usually want to bond a keyword strongly to what immediately # follows, rather than leaving it stranded at the end of a line $right_bond_strength{'k'} = STRONG; $left_bond_strength{'G'} = NOMINAL; $right_bond_strength{'G'} = STRONG; # assignment operators @q = qw( = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= ); # Default is to break AFTER various assignment operators @left_bond_strength{@q} = (STRONG) x scalar(@q); @right_bond_strength{@q} = ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q); # Default is to break BEFORE '&&' and '||' and '//' # set strength of '||' to same as '=' so that chains like # $a = $b || $c || $d will break before the first '||' $right_bond_strength{'||'} = NOMINAL; $left_bond_strength{'||'} = $right_bond_strength{'='}; # same thing for '//' $right_bond_strength{'//'} = NOMINAL; $left_bond_strength{'//'} = $right_bond_strength{'='}; # set strength of && a little higher than || $right_bond_strength{'&&'} = NOMINAL; $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1; $left_bond_strength{';'} = VERY_STRONG; $right_bond_strength{';'} = VERY_WEAK; $left_bond_strength{'f'} = VERY_STRONG; # make right strength of for ';' a little less than '=' # to make for contents break after the ';' to avoid this: # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j += # $number_of_fields ) # and make it weaker than ',' and 'and' too $right_bond_strength{'f'} = VERY_WEAK - 0.03; # The strengths of ?/: should be somewhere between # an '=' and a quote (NOMINAL), # make strength of ':' slightly less than '?' to help # break long chains of ? : after the colons $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL; $right_bond_strength{':'} = NO_BREAK; $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01; $right_bond_strength{'?'} = NO_BREAK; $left_bond_strength{','} = VERY_STRONG; $right_bond_strength{','} = VERY_WEAK; # remaining digraphs and trigraphs not defined above @q = qw( :: <> ++ --); @left_bond_strength{@q} = (WEAK) x scalar(@q); @right_bond_strength{@q} = (STRONG) x scalar(@q); # Set bond strengths of certain keywords # make 'or', 'err', 'and' slightly weaker than a ',' $left_bond_strength{'and'} = VERY_WEAK - 0.01; $left_bond_strength{'or'} = VERY_WEAK - 0.02; $left_bond_strength{'err'} = VERY_WEAK - 0.02; $left_bond_strength{'xor'} = VERY_WEAK - 0.01; @q = qw(ne eq); @left_bond_strength{@q} = (NOMINAL) x scalar(@q); @q = qw(lt gt le ge); @left_bond_strength{@q} = ( 0.9 * NOMINAL + 0.1 * STRONG ) x scalar(@q); @q = qw(and or err xor ne eq); @right_bond_strength{@q} = (NOMINAL) x scalar(@q); #--------------------------------------------------------------- # Bond Strength BEGIN Section 2. # Set binary rules for bond strengths between certain token types. #--------------------------------------------------------------- # We have a little problem making tables which apply to the # container tokens. Here is a list of container tokens and # their types: # # type tokens // meaning # { {, [, ( // indent # } }, ], ) // outdent # [ [ // left non-structural [ (enclosing an array index) # ] ] // right non-structural square bracket # ( ( // left non-structural paren # ) ) // right non-structural paren # L { // left non-structural curly brace (enclosing a key) # R } // right non-structural curly brace # # Some rules apply to token types and some to just the token # itself. We solve the problem by combining type and token into a # new hash key for the container types. # # If a rule applies to a token 'type' then we need to make rules # for each of these 'type.token' combinations: # Type Type.Token # { {{, {[, {( # [ [[ # ( (( # L L{ # } }}, }], }) # ] ]] # ) )) # R R} # # If a rule applies to a token then we need to make rules for # these 'type.token' combinations: # Token Type.Token # { {{, L{ # [ {[, [[ # ( {(, (( # } }}, R} # ] }], ]] # ) }), )) # allow long lines before final { in an if statement, as in: # if (.......... # ..........) # { # # Otherwise, the line before the { tends to be too short. $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03; $binary_bond_strength{'(('}{'{{'} = NOMINAL; # break on something like '} (', but keep this stronger than a ',' # example is in 'howe.pl' $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK; $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK; # keep matrix and hash indices together # but make them a little below STRONG to allow breaking open # something like {'some-word'}{'some-very-long-word'} at the }{ # (bracebrk.t) $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL; $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL; $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL; $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL; # increase strength to the point where a break in the following # will be after the opening paren rather than at the arrow: # $a->$b($c); $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG; # Added for c140 to make 'w ->' and 'i ->' behave the same $binary_bond_strength{'w'}{'->'} = 1.45 * STRONG; # Note that the following alternative strength would make the break at the # '->' rather than opening the '('. Both have advantages and disadvantages. # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; # $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL; $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL; $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL; $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL; $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL; #--------------------------------------------------------------- # Binary NO_BREAK rules #--------------------------------------------------------------- # use strict requires that bare word and => not be separated $binary_bond_strength{'C'}{'=>'} = NO_BREAK; $binary_bond_strength{'U'}{'=>'} = NO_BREAK; # Never break between a bareword and a following paren because # perl may give an error. For example, if a break is placed # between 'to_filehandle' and its '(' the following line will # give a syntax error [Carp.pm]: my( $no) =fileno( # to_filehandle( $in)) ; $binary_bond_strength{'C'}{'(('} = NO_BREAK; $binary_bond_strength{'C'}{'{('} = NO_BREAK; $binary_bond_strength{'U'}{'(('} = NO_BREAK; $binary_bond_strength{'U'}{'{('} = NO_BREAK; # use strict requires that bare word within braces not start new # line $binary_bond_strength{'L{'}{'w'} = NO_BREAK; $binary_bond_strength{'w'}{'R}'} = NO_BREAK; # The following two rules prevent a syntax error caused by breaking up # a construction like '{-y}'. The '-' quotes the 'y' and prevents # it from being taken as a transliteration. We have to keep # token types 'L m w' together to prevent this error. $binary_bond_strength{'L{'}{'m'} = NO_BREAK; $binary_bond_strength_nospace{'m'}{'w'} = NO_BREAK; # keep 'bareword-' together, but only if there is no space between # the word and dash. Do not keep together if there is a space. # example 'use perl6-alpha' $binary_bond_strength_nospace{'w'}{'m'} = NO_BREAK; # use strict requires that bare word and => not be separated $binary_bond_strength{'w'}{'=>'} = NO_BREAK; # use strict does not allow separating type info from trailing { } # testfile is readmail.pl $binary_bond_strength{'t'}{'L{'} = NO_BREAK; $binary_bond_strength{'i'}{'L{'} = NO_BREAK; # Fix for c250: set strength for new 'S' to be same as 'i' # testfile is test11/Hub.pm $binary_bond_strength{'S'}{'L{'} = NO_BREAK; # As a defensive measure, do not break between a '(' and a # filehandle. In some cases, this can cause an error. For # example, the following program works: # my $msg="hi!\n"; # print # ( STDOUT # $msg # ); # # But this program fails: # my $msg="hi!\n"; # print # ( # STDOUT # $msg # ); # # This is normally only a problem with the 'extrude' option $binary_bond_strength{'(('}{'Y'} = NO_BREAK; $binary_bond_strength{'{('}{'Y'} = NO_BREAK; # never break between sub name and opening paren $binary_bond_strength{'w'}{'(('} = NO_BREAK; $binary_bond_strength{'w'}{'{('} = NO_BREAK; # keep '}' together with ';' $binary_bond_strength{'}}'}{';'} = NO_BREAK; # Breaking before a ++ can cause perl to guess wrong. For # example the following line will cause a syntax error # with -extrude if we break between '$i' and '++' [fixstyle2] # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) ); $nobreak_lhs{'++'} = NO_BREAK; # Do not break before a possible file handle $nobreak_lhs{'Z'} = NO_BREAK; # use strict hates bare words on any new line. For # example, a break before the underscore here provokes the # wrath of use strict: # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) { $nobreak_rhs{'F'} = NO_BREAK; $nobreak_rhs{'CORE::'} = NO_BREAK; # To prevent the tokenizer from switching between types 'w' and 'G' we # need to avoid breaking between type 'G' and the following code block # brace. Fixes case b929. $nobreak_rhs{G} = NO_BREAK; #--------------------------------------------------------------- # Bond Strength BEGIN Section 3. # Define tables and values for applying a small bias to the above # values. #--------------------------------------------------------------- # Adding a small 'bias' to strengths is a simple way to make a line # break at the first of a sequence of identical terms. For # example, to force long string of conditional operators to break # with each line ending in a ':', we can add a small number to the # bond strength of each ':' (colon.t) @bias_tokens = qw( : && || f and or . ); # tokens which get bias %bias_hash = map { $_ => 0 } @bias_tokens; $delta_bias = 0.0001; # a very small strength level return; } ## end sub initialize_bond_strength_hashes use constant DEBUG_BOND => 0; sub set_bond_strengths { my ($self) = @_; #----------------------------------------------------------------- # Define a 'bond strength' for each token pair in an output batch. # See comments above for definition of bond strength. #----------------------------------------------------------------- my $rbond_strength_to_go = []; my $rLL = $self->[_rLL_]; my $rK_weld_right = $self->[_rK_weld_right_]; my $rK_weld_left = $self->[_rK_weld_left_]; my $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; # patch-its always ok to break at end of line $nobreak_to_go[$max_index_to_go] = 0; # we start a new set of bias values for each line %bias = %bias_hash; my $code_bias = -.01; # bias for closing block braces my $type = 'b'; my $token = SPACE; my $token_length = 1; my $last_type; my $last_nonblank_type = $type; my $last_nonblank_token = $token; my $list_str = $left_bond_strength{'?'}; my ( $bond_str_1, $bond_str_2, $bond_str_3, $bond_str_4 ); my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token, $next_nonblank_type, $next_token, $next_type, $total_nesting_depth, ); # main loop to compute bond strengths between each pair of tokens foreach my $i ( 0 .. $max_index_to_go ) { $last_type = $type; if ( $type ne 'b' ) { $last_nonblank_type = $type; $last_nonblank_token = $token; } $type = $types_to_go[$i]; # strength on both sides of a blank is the same if ( $type eq 'b' && $last_type ne 'b' ) { $rbond_strength_to_go->[$i] = $rbond_strength_to_go->[ $i - 1 ]; $nobreak_to_go[$i] ||= $nobreak_to_go[ $i - 1 ]; # fix for b1257 next; } $token = $tokens_to_go[$i]; $token_length = $token_lengths_to_go[$i]; $block_type = $block_type_to_go[$i]; $i_next = $i + 1; $next_type = $types_to_go[$i_next]; $next_token = $tokens_to_go[$i_next]; $total_nesting_depth = $nesting_depth_to_go[$i_next]; $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); $next_nonblank_type = $types_to_go[$i_next_nonblank]; $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; my $seqno = $type_sequence_to_go[$i]; my $next_nonblank_seqno = $type_sequence_to_go[$i_next_nonblank]; # We are computing the strength of the bond between the current # token and the NEXT token. #--------------------------------------------------------------- # Bond Strength Section 1: # First Approximation. # Use minimum of individual left and right tabulated bond # strengths. #--------------------------------------------------------------- my $bsr = $right_bond_strength{$type}; my $bsl = $left_bond_strength{$next_nonblank_type}; # define right bond strengths of certain keywords if ( $type eq 'k' ) { if ( defined( $right_bond_strength{$token} ) ) { $bsr = $right_bond_strength{$token}; } } # set terminal bond strength to the nominal value # this will cause good preceding breaks to be retained if ( $i_next_nonblank > $max_index_to_go ) { $bsl = NOMINAL; # But weaken the bond at a 'missing terminal comma'. If an # optional comma is missing at the end of a broken list, use # the strength of a comma anyway to make formatting the same as # if it were there. Fixes issue c133. if ( !defined($bsr) || $bsr > VERY_WEAK ) { my $seqno_px = $parent_seqno_to_go[$max_index_to_go]; if ( $ris_list_by_seqno->{$seqno_px} ) { my $KK = $K_to_go[$max_index_to_go]; my $Kn = $self->K_next_nonblank($KK); my $seqno_n = $rLL->[$Kn]->[_TYPE_SEQUENCE_]; if ( $seqno_n && $seqno_n eq $seqno_px ) { $bsl = VERY_WEAK; } } } } # define left bond strengths of certain keywords if ( $next_nonblank_type eq 'k' ) { if ( defined( $left_bond_strength{$next_nonblank_token} ) ) { $bsl = $left_bond_strength{$next_nonblank_token}; } } # Use the minimum of the left and right strengths. Note: it might # seem that we would want to keep a NO_BREAK if either token has # this value. This didn't work, for example because in an arrow # list, it prevents the comma from separating from the following # bare word (which is probably quoted by its arrow). So necessary # NO_BREAK's have to be handled as special cases in the final # section. if ( !defined($bsr) ) { $bsr = VERY_STRONG } if ( !defined($bsl) ) { $bsl = VERY_STRONG } my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl; $bond_str_1 = $bond_str if (DEBUG_BOND); #--------------------------------------------------------------- # Bond Strength Section 2: # Apply hardwired rules.. #--------------------------------------------------------------- # Patch to put terminal or clauses on a new line: Weaken the bond # at an || followed by die or similar keyword to make the terminal # or clause fall on a new line, like this: # # my $class = shift # || die "Cannot add broadcast: No class identifier found"; # # Otherwise the break will be at the previous '=' since the || and # = have the same starting strength and the or is biased, like # this: # # my $class = # shift || die "Cannot add broadcast: No class identifier found"; # # In any case if the user places a break at either the = or the || # it should remain there. if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) { # /^(die|confess|croak|warn)$/ if ( $is_die_confess_croak_warn{$next_nonblank_token} ) { if ( $want_break_before{$token} && $i > 0 ) { $rbond_strength_to_go->[ $i - 1 ] -= $delta_bias; # keep bond strength of a token and its following blank # the same if ( $types_to_go[ $i - 1 ] eq 'b' && $i > 2 ) { $rbond_strength_to_go->[ $i - 2 ] -= $delta_bias; } } else { $bond_str -= $delta_bias; } } } # good to break after end of code blocks if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) { $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias; $code_bias += $delta_bias; } if ( $type eq 'k' ) { # allow certain control keywords to stand out if ( $next_nonblank_type eq 'k' && $is_last_next_redo_return{$token} ) { $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK; } # Don't break after keyword my. This is a quick fix for a # rare problem with perl. An example is this line from file # Container.pm: # foreach my $question( Debian::DebConf::ConfigDb::gettree( # $this->{'question'} ) ) if ( $token eq 'my' ) { $bond_str = NO_BREAK; } } if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) { if ( $is_keyword_returning_list{$next_nonblank_token} ) { $bond_str = $list_str if ( $bond_str > $list_str ); } # keywords like 'unless', 'if', etc, within statements # make good breaks if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) { $bond_str = VERY_WEAK / 1.05; } } # try not to break before a comma-arrow elsif ( $next_nonblank_type eq '=>' ) { if ( $bond_str < STRONG ) { $bond_str = STRONG } } else { ## ok - not special } #--------------------------------------------------------------- # Additional hardwired NOBREAK rules #--------------------------------------------------------------- # map1.t -- correct for a quirk in perl if ( $token eq '(' && $next_nonblank_type eq 'i' && $last_nonblank_type eq 'k' && $is_sort_map_grep{$last_nonblank_token} ) # /^(sort|map|grep)$/ ) { $bond_str = NO_BREAK; } # extrude.t: do not break before paren at: # -l pid_filename( if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) { $bond_str = NO_BREAK; } # OLD COMMENT: In older version of perl, use strict can cause # problems with breaks before bare words following opening parens. # For example, this will fail under older versions if a break is # made between '(' and 'MAIL': # use strict; open( MAIL, "a long filename or command"); close MAIL; # NEW COMMENT: Third fix for b1213: # This option does not seem to be needed any longer, and it can # cause instabilities. It can be turned off, but to minimize # changes to existing formatting it is retained only in the case # where the previous token was 'open' and there was no line break. # Even this could eventually be removed if it causes instability. if ( $type eq '{' ) { if ( $token eq '(' && $next_nonblank_type eq 'w' && $last_nonblank_type eq 'k' && $last_nonblank_token eq 'open' && !$old_breakpoint_to_go[$i] ) { $bond_str = NO_BREAK; } } # Do not break between a possible filehandle and a ? or / and do # not introduce a break after it if there is no blank # (extrude.t) elsif ( $type eq 'Z' ) { # don't break.. if ( # if there is no blank and we do not want one. Examples: # print $x++ # do not break after $x # print HTML"HELLO" # break ok after HTML ( $next_type ne 'b' && defined( $want_left_space{$next_type} ) && $want_left_space{$next_type} == WS_NO ) # or we might be followed by the start of a quote, # and this is not an existing breakpoint; fixes c039. || !$old_breakpoint_to_go[$i] && substr( $next_nonblank_token, 0, 1 ) eq '/' ) { $bond_str = NO_BREAK; } } else { ## ok - not special } # Breaking before a ? before a quote can cause trouble if # they are not separated by a blank. # Example: a syntax error occurs if you break before the ? here # my$logic=join$all?' && ':' || ',@regexps; # From: Professional_Perl_Programming_Code/multifind.pl if ( $next_nonblank_type eq '?' ) { $bond_str = NO_BREAK if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' ); } # Breaking before a . followed by a number # can cause trouble if there is no intervening space # Example: a syntax error occurs if you break before the .2 here # $str .= pack($endian.2, ensurrogate($ord)); # From: perl58/Unicode.pm elsif ( $next_nonblank_type eq '.' ) { $bond_str = NO_BREAK if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' ); } # Fix for c039 elsif ( $type eq 'w' ) { $bond_str = NO_BREAK if ( !$old_breakpoint_to_go[$i] && substr( $next_nonblank_token, 0, 1 ) eq '/' && $next_nonblank_type ne '//' ); } else { ## ok - not special } $bond_str_2 = $bond_str if (DEBUG_BOND); #--------------------------------------------------------------- # End of hardwired rules #--------------------------------------------------------------- #--------------------------------------------------------------- # Bond Strength Section 3: # Apply table rules. These have priority over the above # hardwired rules. #--------------------------------------------------------------- my $tabulated_bond_str; my $ltype = $type; my $rtype = $next_nonblank_type; if ( $seqno && $is_container_token{$token} ) { $ltype = $type . $token; } if ( $next_nonblank_seqno && $is_container_token{$next_nonblank_token} ) { $rtype = $next_nonblank_type . $next_nonblank_token; # Alternate Fix #1 for issue b1299. This version makes the # decision as soon as possible. See Alternate Fix #2 also. # Do not separate a bareword identifier from its paren: b1299 # This is currently needed for stability because if the bareword # gets separated from a preceding '->' and following '(' then # the tokenizer may switch from type 'i' to type 'w'. This # patch will prevent this by keeping it adjacent to its '('. ## if ( $next_nonblank_token eq '(' ## && $ltype eq 'i' ## && substr( $token, 0, 1 ) =~ /^\w$/ ) ## { ## $ltype = 'w'; ## } } # apply binary rules which apply regardless of space between tokens if ( $binary_bond_strength{$ltype}{$rtype} ) { $bond_str = $binary_bond_strength{$ltype}{$rtype}; $tabulated_bond_str = $bond_str; } # apply binary rules which apply only if no space between tokens if ( $binary_bond_strength_nospace{$ltype}{$next_type} ) { $bond_str = $binary_bond_strength{$ltype}{$next_type}; $tabulated_bond_str = $bond_str; } if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) { $bond_str = NO_BREAK; $tabulated_bond_str = $bond_str; } $bond_str_3 = $bond_str if (DEBUG_BOND); # If the hardwired rules conflict with the tabulated bond # strength then there is an inconsistency that should be fixed DEBUG_BOND && $tabulated_bond_str && $bond_str_1 && $bond_str_1 != $bond_str_2 && $bond_str_2 != $tabulated_bond_str && do { print {*STDOUT} "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n"; }; #----------------------------------------------------------------- # Bond Strength Section 4: # Modify strengths of certain tokens which often occur in sequence # by adding a small bias to each one in turn so that the breaks # occur from left to right. # # Note that we only changing strengths by small amounts here, # and usually increasing, so we should not be altering any NO_BREAKs. # Other routines which check for NO_BREAKs will use a tolerance # of one to avoid any problem. #----------------------------------------------------------------- # The bias tables use special keys: # $type - if not keyword # $token - if keyword, but map some keywords together my $left_key = $type eq 'k' ? $token eq 'err' ? 'or' : $token : $type; my $right_key = $next_nonblank_type eq 'k' ? $next_nonblank_token eq 'err' ? 'or' : $next_nonblank_token : $next_nonblank_type; # bias left token if ( defined( $bias{$left_key} ) ) { if ( !$want_break_before{$left_key} ) { $bias{$left_key} += $delta_bias; $bond_str += $bias{$left_key}; } } # bias right token if ( defined( $bias{$right_key} ) ) { if ( $want_break_before{$right_key} ) { # for leading '.' align all but 'short' quotes; the idea # is to not place something like "\n" on a single line. if ( $right_key eq '.' ) { my $is_short_quote = $last_nonblank_type eq '.' && ( $token_length <= $rOpts_short_concatenation_item_length ) && !$is_closing_token{$token}; if ( !$is_short_quote ) { $bias{$right_key} += $delta_bias; } } else { $bias{$right_key} += $delta_bias; } $bond_str += $bias{$right_key}; } } $bond_str_4 = $bond_str if (DEBUG_BOND); #--------------------------------------------------------------- # Bond Strength Section 5: # Fifth Approximation. # Take nesting depth into account by adding the nesting depth # to the bond strength. #--------------------------------------------------------------- my $strength; if ( defined($bond_str) && !$nobreak_to_go[$i] ) { if ( $total_nesting_depth > 0 ) { $strength = $bond_str + $total_nesting_depth; } else { $strength = $bond_str; } } else { $strength = NO_BREAK; # For critical code such as lines with here targets we must # be absolutely sure that we do not allow a break. So for # these the nobreak flag exceeds 1 as a signal. Otherwise we # can run into trouble when small tolerances are added. $strength += 1 if ( $nobreak_to_go[$i] && $nobreak_to_go[$i] > 1 ); } #--------------------------------------------------------------- # Bond Strength Section 6: # Sixth Approximation. Welds. #--------------------------------------------------------------- # Do not allow a break within welds if ( $total_weld_count && $seqno ) { my $KK = $K_to_go[$i]; if ( $rK_weld_right->{$KK} ) { $strength = NO_BREAK; } # But encourage breaking after opening welded tokens elsif ($rK_weld_left->{$KK} && $is_opening_token{$token} ) { $strength -= 1; } else { ## ok - not welded left or right } } # always break after side comment if ( $type eq '#' ) { $strength = 0 } $rbond_strength_to_go->[$i] = $strength; # Fix for case c001: be sure NO_BREAK's are enforced by later # routines, except at a '?' because '?' as quote delimiter is # deprecated. if ( $strength >= NO_BREAK && $next_nonblank_type ne '?' ) { $nobreak_to_go[$i] ||= 1; } DEBUG_BOND && do { my $str = substr( $token, 0, 15 ); $str .= SPACE x ( 16 - length($str) ); print {*STDOUT} "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n"; # reset for next pass $bond_str_1 = $bond_str_2 = $bond_str_3 = $bond_str_4 = undef; }; } ## end main loop return $rbond_strength_to_go; } ## end sub set_bond_strengths } ## end closure set_bond_strengths sub bad_pattern { my ($pattern) = @_; # See if a pattern will compile. # Note: this sub is also called from Tokenizer my $regex = eval { qr/$pattern/ }; return $EVAL_ERROR; } { ## begin closure prepare_cuddled_block_types my %no_cuddle; # Add keywords here which really should not be cuddled BEGIN { my @q = qw(if unless for foreach while); @no_cuddle{@q} = (1) x scalar(@q); } sub prepare_cuddled_block_types { # the cuddled-else style, if used, is controlled by a hash that # we construct here # Include keywords here which should not be cuddled my $cuddled_string = EMPTY_STRING; if ( $rOpts->{'cuddled-else'} ) { # set the default $cuddled_string = 'elsif else continue catch finally' unless ( $rOpts->{'cuddled-block-list-exclusive'} ); # This is the old equivalent but more complex version # $cuddled_string = 'if-elsif-else unless-elsif-else -continue '; # Add users other blocks to be cuddled my $cuddled_block_list = $rOpts->{'cuddled-block-list'}; if ($cuddled_block_list) { $cuddled_string .= SPACE . $cuddled_block_list; } } # If we have a cuddled string of the form # 'try-catch-finally' # we want to prepare a hash of the form # $rcuddled_block_types = { # 'try' => { # 'catch' => 1, # 'finally' => 1 # }, # }; # use -dcbl to dump this hash # Multiple such strings are input as a space or comma separated list # If we get two lists with the same leading type, such as # -cbl = "-try-catch-finally -try-catch-otherwise" # then they will get merged as follows: # $rcuddled_block_types = { # 'try' => { # 'catch' => 1, # 'finally' => 2, # 'otherwise' => 1, # }, # }; # This will allow either type of chain to be followed. $cuddled_string =~ s/,/ /g; # allow space or comma separated lists my @cuddled_strings = split /\s+/, $cuddled_string; $rcuddled_block_types = {}; # process each dash-separated string... my $string_count = 0; foreach my $string (@cuddled_strings) { next unless $string; my @words = split /-+/, $string; # allow multiple dashes # we could look for and report possible errors here... next if ( @words <= 0 ); # allow either '-continue' or *-continue' for arbitrary starting type my $start = '*'; # a single word without dashes is a secondary block type if ( @words > 1 ) { $start = shift @words; } # always make an entry for the leading word. If none follow, this # will still prevent a wildcard from matching this word. if ( !defined( $rcuddled_block_types->{$start} ) ) { $rcuddled_block_types->{$start} = {}; } # The count gives the original word order in case we ever want it. $string_count++; my $word_count = 0; foreach my $word (@words) { next unless $word; if ( $no_cuddle{$word} ) { Warn( "## Ignoring keyword '$word' in -cbl; does not seem right\n" ); next; } $word_count++; $rcuddled_block_types->{$start}->{$word} = 1; #"$string_count.$word_count"; # git#9: Remove this word from the list of desired one-line # blocks $want_one_line_block{$word} = 0; } } return; } ## end sub prepare_cuddled_block_types } ## end closure prepare_cuddled_block_types sub dump_cuddled_block_list { my ($fh) = @_; # ORIGINAL METHOD: Here is the format of the cuddled block type hash # which controls this routine # my $rcuddled_block_types = { # 'if' => { # 'else' => 1, # 'elsif' => 1 # }, # 'try' => { # 'catch' => 1, # 'finally' => 1 # }, # }; # SIMPLIFIED METHOD: the simplified method uses a wildcard for # the starting block type and puts all cuddled blocks together: # my $rcuddled_block_types = { # '*' => { # 'else' => 1, # 'elsif' => 1 # 'catch' => 1, # 'finally' => 1 # }, # }; # Both methods work, but the simplified method has proven to be adequate and # easier to manage. my $cuddled_string = $rOpts->{'cuddled-block-list'}; $cuddled_string = EMPTY_STRING unless $cuddled_string; my $flags = EMPTY_STRING; $flags .= "-ce" if ( $rOpts->{'cuddled-else'} ); $flags .= " -cbl='$cuddled_string'"; if ( !$rOpts->{'cuddled-else'} ) { $flags .= "\nNote: You must specify -ce to generate a cuddled hash"; } $fh->print(<<EOM); ------------------------------------------------------------------------ Hash of cuddled block types prepared for a run with these parameters: $flags ------------------------------------------------------------------------ EOM use Data::Dumper; $fh->print( Dumper($rcuddled_block_types) ); $fh->print(<<EOM); ------------------------------------------------------------------------ EOM return; } ## end sub dump_cuddled_block_list sub make_static_block_comment_pattern { # create the pattern used to identify static block comments $static_block_comment_pattern = '^\s*##'; # allow the user to change it if ( $rOpts->{'static-block-comment-prefix'} ) { my $prefix = $rOpts->{'static-block-comment-prefix'}; $prefix =~ s/^\s*//; my $pattern = $prefix; # user may give leading caret to force matching left comments only if ( $prefix !~ /^\^#/ ) { if ( $prefix !~ /^#/ ) { Die( "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n" ); } $pattern = '^\s*' . $prefix; } if ( bad_pattern($pattern) ) { Die( "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n" ); } $static_block_comment_pattern = $pattern; } return; } ## end sub make_static_block_comment_pattern sub make_format_skipping_pattern { my ( $opt_name, $default ) = @_; my $param = $rOpts->{$opt_name}; if ( !$param ) { $param = $default } $param =~ s/^\s*//; if ( $param !~ /^#/ ) { Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n"); } my $pattern = '^' . $param . '\s'; if ( bad_pattern($pattern) ) { Die( "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n" ); } return $pattern; } ## end sub make_format_skipping_pattern sub make_non_indenting_brace_pattern { # Create the pattern used to identify static side comments. # Note that we are ending the pattern in a \s. This will allow # the pattern to be followed by a space and some text, or a newline. # The pattern is used in sub 'non_indenting_braces' $non_indenting_brace_pattern = '^#<<<\s'; # allow the user to change it if ( $rOpts->{'non-indenting-brace-prefix'} ) { my $prefix = $rOpts->{'non-indenting-brace-prefix'}; $prefix =~ s/^\s*//; if ( $prefix !~ /^#/ ) { Die("ERROR: the -nibp parameter '$prefix' must begin with '#'\n"); } my $pattern = '^' . $prefix . '\s'; if ( bad_pattern($pattern) ) { Die( "ERROR: the -nibp prefix '$prefix' causes the invalid regex '$pattern'\n" ); } $non_indenting_brace_pattern = $pattern; } return; } ## end sub make_non_indenting_brace_pattern sub make_closing_side_comment_list_pattern { # turn any input list into a regex for recognizing selected block types $closing_side_comment_list_pattern = '^\w+'; if ( defined( $rOpts->{'closing-side-comment-list'} ) && $rOpts->{'closing-side-comment-list'} ) { $closing_side_comment_list_pattern = make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} ); } return; } ## end sub make_closing_side_comment_list_pattern sub make_sub_matching_pattern { # Patterns for standardizing matches to block types for regular subs and # anonymous subs. Examples # 'sub process' is a named sub # 'sub ::m' is a named sub # 'sub' is an anonymous sub # 'sub:' is a label, not a sub # 'sub :' is a label, not a sub ( block type will be <sub:> ) # sub'_ is a named sub ( block type will be <sub '_> ) # 'substr' is a keyword # So note that named subs always have a space after 'sub' $SUB_PATTERN = '^sub\s'; # match normal sub $ASUB_PATTERN = '^sub$'; # match anonymous sub %matches_ASUB = ( 'sub' => 1 ); # Fix the patterns to include any sub aliases: # Note that any 'sub-alias-list' has been preprocessed to # be a trimmed, space-separated list which includes 'sub' # for example, it might be 'sub method fun' my @words; my $sub_alias_list = $rOpts->{'sub-alias-list'}; if ($sub_alias_list) { @words = split /\s+/, $sub_alias_list; } else { push @words, 'sub'; } # add 'method' unless use-feature='noclass' is set. if ( !defined( $rOpts->{'use-feature'} ) || $rOpts->{'use-feature'} !~ /\bnoclass\b/ ) { push @words, 'method'; } # Note (see also RT #133130): These patterns are used by # sub make_block_pattern, which is used for making most patterns. # So this sub needs to be called before other pattern-making routines. if ( @words > 1 ) { # Two ways are provided to match an anonymous sub: # $ASUB_PATTERN - with a regex (old method, slow) # %matches_ASUB - with a hash lookup (new method, faster) @matches_ASUB{@words} = (1) x scalar(@words); my $alias_list = join '|', keys %matches_ASUB; $SUB_PATTERN =~ s/sub/\($alias_list\)/; $ASUB_PATTERN =~ s/sub/\($alias_list\)/; } return; } ## end sub make_sub_matching_pattern sub make_bl_pattern { # Set defaults lists to retain historical default behavior for -bl: my $bl_list_string = '*'; my $bl_exclusion_list_string = 'sort map grep eval asub'; if ( defined( $rOpts->{'brace-left-list'} ) && $rOpts->{'brace-left-list'} ) { $bl_list_string = $rOpts->{'brace-left-list'}; } if ( $bl_list_string =~ /\bsub\b/ ) { $rOpts->{'opening-sub-brace-on-new-line'} ||= $rOpts->{'opening-brace-on-new-line'}; } if ( $bl_list_string =~ /\basub\b/ ) { $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||= $rOpts->{'opening-brace-on-new-line'}; } $bl_pattern = make_block_pattern( '-bll', $bl_list_string ); # for -bl, a list with '*' turns on -sbl and -asbl if ( $bl_pattern =~ /\.\*/ ) { $rOpts->{'opening-sub-brace-on-new-line'} ||= $rOpts->{'opening-brace-on-new-line'}; $rOpts->{'opening-anonymous-sub-brace-on-new-line'} ||= $rOpts->{'opening-anonymous-brace-on-new-line'}; } if ( defined( $rOpts->{'brace-left-exclusion-list'} ) && $rOpts->{'brace-left-exclusion-list'} ) { $bl_exclusion_list_string = $rOpts->{'brace-left-exclusion-list'}; if ( $bl_exclusion_list_string =~ /\bsub\b/ ) { $rOpts->{'opening-sub-brace-on-new-line'} = 0; } if ( $bl_exclusion_list_string =~ /\basub\b/ ) { $rOpts->{'opening-anonymous-sub-brace-on-new-line'} = 0; } } $bl_exclusion_pattern = make_block_pattern( '-blxl', $bl_exclusion_list_string ); return; } ## end sub make_bl_pattern sub make_bli_pattern { # default list of block types for which -bli would apply my $bli_list_string = 'if else elsif unless while for foreach do : sub'; my $bli_exclusion_list_string = SPACE; if ( defined( $rOpts->{'brace-left-and-indent-list'} ) && $rOpts->{'brace-left-and-indent-list'} ) { $bli_list_string = $rOpts->{'brace-left-and-indent-list'}; } $bli_pattern = make_block_pattern( '-blil', $bli_list_string ); if ( defined( $rOpts->{'brace-left-and-indent-exclusion-list'} ) && $rOpts->{'brace-left-and-indent-exclusion-list'} ) { $bli_exclusion_list_string = $rOpts->{'brace-left-and-indent-exclusion-list'}; } $bli_exclusion_pattern = make_block_pattern( '-blixl', $bli_exclusion_list_string ); return; } ## end sub make_bli_pattern sub make_keyword_group_list_pattern { # turn any input list into a regex for recognizing selected block types. # Here are the defaults: $keyword_group_list_pattern = '^(our|local|my|use|require|)$'; $keyword_group_list_comment_pattern = EMPTY_STRING; if ( defined( $rOpts->{'keyword-group-blanks-list'} ) && $rOpts->{'keyword-group-blanks-list'} ) { my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'}; my @keyword_list; my @comment_list; foreach my $word (@words) { if ( $word eq 'BC' || $word eq 'SBC' ) { push @comment_list, $word; if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' } } else { push @keyword_list, $word; } } $keyword_group_list_pattern = make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} ); $keyword_group_list_comment_pattern = make_block_pattern( '-kgbl', join( SPACE, @comment_list ) ); } return; } ## end sub make_keyword_group_list_pattern sub make_block_brace_vertical_tightness_pattern { # turn any input list into a regex for recognizing selected block types $block_brace_vertical_tightness_pattern = '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} ) && $rOpts->{'block-brace-vertical-tightness-list'} ) { $block_brace_vertical_tightness_pattern = make_block_pattern( '-bbvtl', $rOpts->{'block-brace-vertical-tightness-list'} ); } return; } ## end sub make_block_brace_vertical_tightness_pattern sub make_blank_line_pattern { $blank_lines_before_closing_block_pattern = $SUB_PATTERN; my $key = 'blank-lines-before-closing-block-list'; if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) { $blank_lines_before_closing_block_pattern = make_block_pattern( '-blbcl', $rOpts->{$key} ); } $blank_lines_after_opening_block_pattern = $SUB_PATTERN; $key = 'blank-lines-after-opening-block-list'; if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) { $blank_lines_after_opening_block_pattern = make_block_pattern( '-blaol', $rOpts->{$key} ); } return; } ## end sub make_blank_line_pattern sub make_block_pattern { # given a string of block-type keywords, return a regex to match them # The only tricky part is that labels are indicated with a single ':' # and the 'sub' token text may have additional text after it (name of # sub). # # Example: # # input string: "if else elsif unless while for foreach do : sub"; # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; # Minor Update: # # To distinguish between anonymous subs and named subs, use 'sub' to # indicate a named sub, and 'asub' to indicate an anonymous sub my ( $abbrev, $string ) = @_; my @list = split_words($string); my @words = (); my %seen; for my $i (@list) { if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern } next if $seen{$i}; $seen{$i} = 1; if ( $i eq 'sub' ) { } elsif ( $i eq 'asub' ) { } elsif ( $i eq ';' ) { push @words, ';'; } elsif ( $i eq '{' ) { push @words, '\{'; } elsif ( $i eq ':' ) { push @words, '\w+:'; } elsif ( $i =~ /^\w/ ) { push @words, $i; } else { Warn("unrecognized block type $i after $abbrev, ignoring\n"); } } # Fix 2 for c091, prevent the pattern from matching an empty string # '1 ' is an impossible block name. if ( !@words ) { push @words, "1 " } my $pattern = '(' . join( '|', @words ) . ')$'; my $sub_patterns = EMPTY_STRING; if ( $seen{'sub'} ) { $sub_patterns .= '|' . $SUB_PATTERN; } if ( $seen{'asub'} ) { $sub_patterns .= '|' . $ASUB_PATTERN; } if ($sub_patterns) { $pattern = '(' . $pattern . $sub_patterns . ')'; } $pattern = '^' . $pattern; return $pattern; } ## end sub make_block_pattern sub make_static_side_comment_pattern { # create the pattern used to identify static side comments $static_side_comment_pattern = '^##'; # allow the user to change it if ( $rOpts->{'static-side-comment-prefix'} ) { my $prefix = $rOpts->{'static-side-comment-prefix'}; $prefix =~ s/^\s*//; my $pattern = '^' . $prefix; if ( bad_pattern($pattern) ) { Die( "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n" ); } $static_side_comment_pattern = $pattern; } return; } ## end sub make_static_side_comment_pattern sub make_closing_side_comment_prefix { # Be sure we have a valid closing side comment prefix my $csc_prefix = $rOpts->{'closing-side-comment-prefix'}; my $csc_prefix_pattern; if ( !defined($csc_prefix) ) { $csc_prefix = '## end'; $csc_prefix_pattern = '^##\s+end'; } else { my $test_csc_prefix = $csc_prefix; if ( $test_csc_prefix !~ /^#/ ) { $test_csc_prefix = '#' . $test_csc_prefix; } # make a regex to recognize the prefix my $test_csc_prefix_pattern = $test_csc_prefix; # escape any special characters $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g; $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern; # allow exact number of intermediate spaces to vary $test_csc_prefix_pattern =~ s/\s+/\\s\+/g; # make sure we have a good pattern # if we fail this we probably have an error in escaping # characters. if ( bad_pattern($test_csc_prefix_pattern) ) { # shouldn't happen..must have screwed up escaping, above if (DEVEL_MODE) { Fault(<<EOM); Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern' EOM } # just warn and keep going with defaults Warn( "Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n" ); Warn("Please consider using a simpler -cscp prefix\n"); Warn("Using default -cscp instead; please check output\n"); } else { $csc_prefix = $test_csc_prefix; $csc_prefix_pattern = $test_csc_prefix_pattern; } } $rOpts->{'closing-side-comment-prefix'} = $csc_prefix; $closing_side_comment_prefix_pattern = $csc_prefix_pattern; return; } ## end sub make_closing_side_comment_prefix ################################################## # CODE SECTION 4: receive lines from the tokenizer ################################################## { ## begin closure write_line my $nesting_depth; # Variables used by sub check_sequence_numbers: my $last_seqno; my %saw_opening_seqno; my %saw_closing_seqno; my $initial_seqno; sub initialize_write_line { $nesting_depth = undef; $last_seqno = SEQ_ROOT; %saw_opening_seqno = (); %saw_closing_seqno = (); return; } ## end sub initialize_write_line sub check_sequence_numbers { # Routine for checking sequence numbers. This only needs to be # done occasionally in DEVEL_MODE to be sure everything is working # correctly. my ( $rtokens, $rtoken_type, $rtype_sequence, $input_line_no ) = @_; my $jmax = @{$rtokens} - 1; return if ( $jmax < 0 ); foreach my $j ( 0 .. $jmax ) { my $seqno = $rtype_sequence->[$j]; my $token = $rtokens->[$j]; my $type = $rtoken_type->[$j]; $seqno = EMPTY_STRING unless ( defined($seqno) ); my $err_msg = "Error at j=$j, line number $input_line_no, seqno='$seqno', type='$type', tok='$token':\n"; if ( !$seqno ) { # Sequence numbers are generated for opening tokens, so every opening # token should be sequenced. Closing tokens will be unsequenced # if they do not have a matching opening token. if ( $is_opening_sequence_token{$token} && $type ne 'q' && $type ne 'Q' ) { Fault( <<EOM $err_msg Unexpected opening token without sequence number EOM ); } } else { # Save starting seqno to identify sequence method: # New method starts with 2 and has continuous numbering # Old method starts with >2 and may have gaps if ( !defined($initial_seqno) ) { $initial_seqno = $seqno } if ( $is_opening_sequence_token{$token} ) { # New method should have continuous numbering if ( $initial_seqno == 2 && $seqno != $last_seqno + 1 ) { Fault( <<EOM $err_msg Unexpected opening sequence number: previous seqno=$last_seqno, but seqno= $seqno EOM ); } $last_seqno = $seqno; # Numbers must be unique if ( $saw_opening_seqno{$seqno} ) { my $lno = $saw_opening_seqno{$seqno}; Fault( <<EOM $err_msg Already saw an opening tokens at line $lno with this sequence number EOM ); } $saw_opening_seqno{$seqno} = $input_line_no; } # only one closing item per seqno elsif ( $is_closing_sequence_token{$token} ) { if ( $saw_closing_seqno{$seqno} ) { my $lno = $saw_closing_seqno{$seqno}; Fault( <<EOM $err_msg Already saw a closing token with this seqno at line $lno EOM ); } $saw_closing_seqno{$seqno} = $input_line_no; # Every closing seqno must have an opening seqno if ( !$saw_opening_seqno{$seqno} ) { Fault( <<EOM $err_msg Saw a closing token but no opening token with this seqno EOM ); } } # Sequenced items must be opening or closing else { Fault( <<EOM $err_msg Unexpected token type with a sequence number EOM ); } } } return; } ## end sub check_sequence_numbers sub store_block_type { my ( $self, $block_type, $seqno ) = @_; return if ( !$block_type ); # Save the type of a block in a hash using sequence number as key $self->[_rblock_type_of_seqno_]->{$seqno} = $block_type; # and save named subs and anynymous subs in separate hashes so that # we only have to do the pattern tests once. if ( $matches_ASUB{$block_type} ) { $self->[_ris_asub_block_]->{$seqno} = 1; } elsif ( $block_type =~ /$SUB_PATTERN/ ) { $self->[_ris_sub_block_]->{$seqno} = 1; } else { ## ok - not a sub } return; } ## end sub store_block_type # hash keys which are common to old and new line_of_tokens my @common_keys; BEGIN { @common_keys = qw( _curly_brace_depth _ending_in_quote _guessed_indentation_level _line_number _line_text _line_type _paren_depth _quote_character _square_bracket_depth _starting_in_quote ); } sub write_line { # This routine receives lines one-by-one from the tokenizer and stores # them in a format suitable for further processing. After the last # line has been sent, the tokenizer will call sub 'finish_formatting' # to do the actual formatting. my ( $self, $line_of_tokens_old ) = @_; my $rLL = $self->[_rLL_]; my $line_of_tokens = {}; # copy common hash key values @{$line_of_tokens}{@common_keys} = @{$line_of_tokens_old}{@common_keys}; my $line_type = $line_of_tokens_old->{_line_type}; my $tee_output; my $Klimit = $self->[_Klimit_]; my $Kfirst; # Handle line of non-code if ( $line_type ne 'CODE' ) { $tee_output ||= $rOpts_tee_pod && substr( $line_type, 0, 3 ) eq 'POD'; $line_of_tokens->{_level_0} = 0; $line_of_tokens->{_ci_level_0} = 0; $line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING; $line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING; $line_of_tokens->{_ended_in_blank_token} = undef; } # Handle line of code else { my $rtokens = $line_of_tokens_old->{_rtokens}; my $jmax = @{$rtokens} - 1; if ( $jmax >= 0 ) { $Kfirst = defined($Klimit) ? $Klimit + 1 : 0; #---------------------------- # get the tokens on this line #---------------------------- $self->write_line_inner_loop( $line_of_tokens_old, $line_of_tokens ); # update Klimit for added tokens $Klimit = @{$rLL} - 1; } ## end if ( $jmax >= 0 ) else { # blank line $line_of_tokens->{_level_0} = 0; $line_of_tokens->{_ci_level_0} = 0; $line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING; $line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING; $line_of_tokens->{_ended_in_blank_token} = undef; } $tee_output ||= $rOpts_tee_block_comments && $jmax == 0 && $rLL->[$Kfirst]->[_TYPE_] eq '#'; $tee_output ||= $rOpts_tee_side_comments && defined($Kfirst) && $Klimit > $Kfirst && $rLL->[$Klimit]->[_TYPE_] eq '#'; } ## end if ( $line_type eq 'CODE') # Finish storing line variables $line_of_tokens->{_rK_range} = [ $Kfirst, $Klimit ]; $self->[_Klimit_] = $Klimit; my $rlines = $self->[_rlines_]; push @{$rlines}, $line_of_tokens; if ($tee_output) { my $fh_tee = $self->[_fh_tee_]; my $line_text = $line_of_tokens_old->{_line_text}; $fh_tee->print($line_text) if ($fh_tee); } return; } ## end sub write_line sub write_line_inner_loop { my ( $self, $line_of_tokens_old, $line_of_tokens ) = @_; #--------------------------------------------------------------------- # Copy the tokens on one line received from the tokenizer to their new # storage locations. #--------------------------------------------------------------------- # Input parameters: # $line_of_tokens_old = line received from tokenizer # $line_of_tokens = line of tokens being formed for formatter my $rtokens = $line_of_tokens_old->{_rtokens}; my $jmax = @{$rtokens} - 1; if ( $jmax < 0 ) { # safety check; shouldn't happen DEVEL_MODE && Fault("unexpected jmax=$jmax\n"); return; } my $line_index = $line_of_tokens_old->{_line_number} - 1; my $rtoken_type = $line_of_tokens_old->{_rtoken_type}; my $rblock_type = $line_of_tokens_old->{_rblock_type}; my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence}; my $rlevels = $line_of_tokens_old->{_rlevels}; my $rLL = $self->[_rLL_]; my $rSS = $self->[_rSS_]; my $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_]; DEVEL_MODE && check_sequence_numbers( $rtokens, $rtoken_type, $rtype_sequence, $line_index + 1 ); # Find the starting nesting depth ... # It must be the value of variable 'level' of the first token # because the nesting depth is used as a token tag in the # vertical aligner and is compared to actual levels. # So vertical alignment problems will occur with any other # starting value. if ( !defined($nesting_depth) ) { $nesting_depth = $rlevels->[0]; $nesting_depth = 0 if ( $nesting_depth < 0 ); $rdepth_of_opening_seqno->[SEQ_ROOT] = $nesting_depth - 1; } my $j = -1; # NOTE: coding efficiency is critical in this loop over all tokens foreach my $token ( @{$rtokens} ) { # NOTE: Do not clip the 'level' variable yet if it is negative. We # will do that later, in sub 'store_token_to_go'. The reason is # that in files with level errors, the logic in 'weld_cuddled_else' # uses a stack logic that will give bad welds if we clip levels # here. (A recent update will probably not even allow negative # levels to arrive here any longer). my $seqno = EMPTY_STRING; # Handle tokens with sequence numbers ... # note the ++ increment hidden here for efficiency if ( $rtype_sequence->[ ++$j ] ) { $seqno = $rtype_sequence->[$j]; my $sign = 1; if ( $is_opening_token{$token} ) { $self->[_K_opening_container_]->{$seqno} = @{$rLL}; $rdepth_of_opening_seqno->[$seqno] = $nesting_depth; $nesting_depth++; # Save a sequenced block type at its opening token. # Note that unsequenced block types can occur in # unbalanced code with errors but are ignored here. $self->store_block_type( $rblock_type->[$j], $seqno ) if ( $rblock_type->[$j] ); } elsif ( $is_closing_token{$token} ) { # The opening depth should always be defined, and # it should equal $nesting_depth-1. To protect # against unforseen error conditions, however, we # will check this and fix things if necessary. For # a test case see issue c055. my $opening_depth = $rdepth_of_opening_seqno->[$seqno]; if ( !defined($opening_depth) ) { $opening_depth = $nesting_depth - 1; $opening_depth = 0 if ( $opening_depth < 0 ); $rdepth_of_opening_seqno->[$seqno] = $opening_depth; # This is not fatal but should not happen. The # tokenizer generates sequence numbers # incrementally upon encountering each new # opening token, so every positive sequence # number should correspond to an opening token. DEVEL_MODE && Fault(<<EOM); No opening token seen for closing token = '$token' at seq=$seqno at depth=$opening_depth EOM } $self->[_K_closing_container_]->{$seqno} = @{$rLL}; $nesting_depth = $opening_depth; $sign = -1; } elsif ( $token eq '?' ) { $self->[_K_opening_ternary_]->{$seqno} = @{$rLL}; } elsif ( $token eq ':' ) { $sign = -1; $self->[_K_closing_ternary_]->{$seqno} = @{$rLL}; } # The only sequenced types output by the tokenizer are # the opening & closing containers and the ternary # types. So we would only get here if the tokenizer has # been changed to mark some other tokens with sequence # numbers, or if an error has been introduced in a # hash such as %is_opening_container else { DEVEL_MODE && Fault(<<EOM); Unexpected sequenced token '$token' of type '$rtoken_type->[$j]', sequence=$seqno arrived from tokenizer. Expecting only opening or closing container tokens or ternary tokens with sequence numbers. EOM } if ( $sign > 0 ) { $self->[_Iss_opening_]->[$seqno] = @{$rSS}; # For efficiency, we find the maximum level of # opening tokens of any type. The actual maximum # level will be that of their contents which is 1 # greater. That will be fixed in sub # 'finish_formatting'. my $level = $rlevels->[$j]; if ( $level > $self->[_maximum_level_] ) { $self->[_maximum_level_] = $level; $self->[_maximum_level_at_line_] = $line_index + 1; } } else { $self->[_Iss_closing_]->[$seqno] = @{$rSS} } push @{$rSS}, $sign * $seqno; } # Here we are storing the first five variables per token. The # remaining token variables will be added later as follows: # _TOKEN_LENGTH_ is added by sub store_token # _CUMULATIVE_LENGTH_ is added by sub store_token # _KNEXT_SEQ_ITEM_ is added by sub respace_post_loop_ops # _CI_LEVEL_ is added by sub set_ci # So all token variables are available for use after sub set_ci. my @tokary; $tokary[_TOKEN_] = $token; $tokary[_TYPE_] = $rtoken_type->[$j]; $tokary[_TYPE_SEQUENCE_] = $seqno; $tokary[_LEVEL_] = $rlevels->[$j]; $tokary[_LINE_INDEX_] = $line_index; push @{$rLL}, \@tokary; } ## end token loop # Need to remember if we can trim the input line $line_of_tokens->{_ended_in_blank_token} = $rtoken_type->[$jmax] eq 'b'; # Values needed by Logger $line_of_tokens->{_level_0} = $rlevels->[0]; $line_of_tokens->{_ci_level_0} = 0; # sub set_ci will fix this $line_of_tokens->{_nesting_blocks_0} = $line_of_tokens_old->{_nesting_blocks_0}; $line_of_tokens->{_nesting_tokens_0} = $line_of_tokens_old->{_nesting_tokens_0}; return; } ## end sub write_line_inner_loop } ## end closure write_line ############################################# # CODE SECTION 5: Pre-process the entire file ############################################# sub finish_formatting { my ( $self, $severe_error ) = @_; # The file has been tokenized and is ready to be formatted. # All of the relevant data is stored in $self, ready to go. # Returns: # true if input file was copied verbatim due to errors # false otherwise # Some of the code in sub break_lists is not robust enough to process code # with arbitrary brace errors. The simplest fix is to just return the file # verbatim if there are brace errors. This fixes issue c160. $severe_error ||= get_saw_brace_error(); # Check the maximum level. If it is extremely large we will give up and # output the file verbatim. Note that the actual maximum level is 1 # greater than the saved value, so we fix that here. $self->[_maximum_level_] += 1; my $maximum_level = $self->[_maximum_level_]; my $maximum_table_index = $#maximum_line_length_at_level; if ( !$severe_error && $maximum_level >= $maximum_table_index ) { $severe_error ||= 1; Warn(<<EOM); The maximum indentation level, $maximum_level, exceeds the builtin limit of $maximum_table_index. Something may be wrong; formatting will be skipped. EOM } # Dump any requested block summary data if ( $rOpts->{'dump-block-summary'} ) { if ($severe_error) { Exit(1) } $self->dump_block_summary(); Exit(0); } # output file verbatim if severe error or no formatting requested if ( $severe_error || $rOpts->{notidy} ) { $self->dump_verbatim(); $self->wrapup($severe_error); return 1; } # Update the 'save_logfile' flag based to include any tokenization errors. # We can save time by skipping logfile calls if it is not going to be saved. my $logger_object = $self->[_logger_object_]; if ($logger_object) { my $save_logfile = $logger_object->get_save_logfile(); $self->[_save_logfile_] = $save_logfile; my $file_writer_object = $self->[_file_writer_object_]; $file_writer_object->set_save_logfile($save_logfile); } { my $rix_side_comments = $self->set_CODE_type(); $self->find_non_indenting_braces($rix_side_comments); # Handle any requested side comment deletions. It is easier to get # this done here rather than farther down the pipeline because IO # lines take a different route, and because lines with deleted HSC # become BL lines. We have already handled any tee requests in sub # getline, so it is safe to delete side comments now. $self->delete_side_comments($rix_side_comments) if ( $rOpts_delete_side_comments || $rOpts_delete_closing_side_comments ); } # Verify that the line hash does not have any unknown keys. $self->check_line_hashes() if (DEVEL_MODE); { # Make a pass through all tokens, adding or deleting any whitespace as # required. Also make any other changes, such as adding semicolons. # All token changes must be made here so that the token data structure # remains fixed for the rest of this iteration. my ( $error, $rqw_lines ) = $self->respace_tokens(); if ($error) { $self->dump_verbatim(); $self->wrapup(); return 1; } # sub 'set_ci' is called after sub respace to allow use of type counts # Token variable _CI_LEVEL_ is only defined after this call $self->set_ci(); $self->find_multiline_qw($rqw_lines); } $self->examine_vertical_tightness_flags(); $self->set_excluded_lp_containers(); $self->keep_old_line_breaks(); # Implement any welding needed for the -wn or -cb options $self->weld_containers(); # Collect info needed to implement the -xlp style $self->xlp_collapsed_lengths() if ( $rOpts_line_up_parentheses && $rOpts_extended_line_up_parentheses ); # Locate small nested blocks which should not be broken $self->mark_short_nested_blocks(); $self->special_indentation_adjustments(); # Verify that the main token array looks OK. If this ever causes a fault # then place similar checks before the sub calls above to localize the # problem. $self->check_rLL("Before 'process_all_lines'") if (DEVEL_MODE); # Finishes formatting and write the result to the line sink. # Eventually this call should just change the 'rlines' data according to the # new line breaks and then return so that we can do an internal iteration # before continuing with the next stages of formatting. $self->process_all_lines(); # A final routine to tie up any loose ends $self->wrapup(); return; } ## end sub finish_formatting my %is_loop_type; BEGIN { my @q = qw( for foreach while do until ); @{is_loop_type}{@q} = (1) x scalar(@q); } sub find_level_info { # Find level ranges and total variations of all code blocks in this file. # Returns: # ref to hash with block info, with seqno as key (see below) my ($self) = @_; # The array _rSS_ has the complete container tree for this file. my $rSS = $self->[_rSS_]; # We will be ignoring everything except code block containers my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; my @stack; my %level_info; # TREE_LOOP: foreach my $sseq ( @{$rSS} ) { my $stack_depth = @stack; my $seq_next = $sseq > 0 ? $sseq : -$sseq; next if ( !$rblock_type_of_seqno->{$seq_next} ); if ( $sseq > 0 ) { # STACK_LOOP: my $item; foreach my $seq (@stack) { $item = $level_info{$seq}; if ( $item->{maximum_depth} < $stack_depth ) { $item->{maximum_depth} = $stack_depth; } $item->{block_count}++; } ## end STACK LOOP push @stack, $seq_next; my $block_type = $rblock_type_of_seqno->{$seq_next}; # If this block is a loop nested within a loop, then we # will mark it as an 'inner_loop'. This is a useful # complexity measure. my $is_inner_loop = 0; if ( $is_loop_type{$block_type} && defined($item) ) { $is_inner_loop = $is_loop_type{ $item->{block_type} }; } $level_info{$seq_next} = { starting_depth => $stack_depth, maximum_depth => $stack_depth, block_count => 1, block_type => $block_type, is_inner_loop => $is_inner_loop, }; } else { my $seq_test = pop @stack; # error check if ( $seq_test != $seq_next ) { # Shouldn't happen - the $rSS array must have an error DEVEL_MODE && Fault("stack error finding total depths\n"); %level_info = (); last; } } } ## end TREE_LOOP return \%level_info; } ## end sub find_level_info sub find_loop_label { my ( $self, $seqno ) = @_; # Given: # $seqno = sequence number of a block of code for a loop # Return: # $label = the loop label text, if any, or an empty string my $rLL = $self->[_rLL_]; my $rlines = $self->[_rlines_]; my $K_opening_container = $self->[_K_opening_container_]; my $label = EMPTY_STRING; my $K_opening = $K_opening_container->{$seqno}; # backup to the line with the opening paren, if any, in case the # keyword is on a different line my $Kp = $self->K_previous_code($K_opening); return $label unless ( defined($Kp) ); if ( $rLL->[$Kp]->[_TOKEN_] eq ')' ) { $seqno = $rLL->[$Kp]->[_TYPE_SEQUENCE_]; $K_opening = $K_opening_container->{$seqno}; } return $label unless ( defined($K_opening) ); my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_]; # look for a label within a few lines; allow a couple of blank lines foreach my $lx ( reverse( $lx_open - 3 .. $lx_open ) ) { last if ( $lx < 0 ); my $line_of_tokens = $rlines->[$lx]; my $line_type = $line_of_tokens->{_line_type}; # stop search on a non-code line last if ( $line_type ne 'CODE' ); my $rK_range = $line_of_tokens->{_rK_range}; my ( $Kfirst, $Klast ) = @{$rK_range}; # skip a blank line next if ( !defined($Kfirst) ); # check for a lable if ( $rLL->[$Kfirst]->[_TYPE_] eq 'J' ) { $label = $rLL->[$Kfirst]->[_TOKEN_]; last; } # quit the search if we are above the starting line last if ( $lx < $lx_open ); } return $label; } ## end sub find_loop_label { ## closure find_mccabe_count my %is_mccabe_logic_keyword; my %is_mccabe_logic_operator; BEGIN { my @q = (qw( && || ||= &&= ? <<= >>= )); @is_mccabe_logic_operator{@q} = (1) x scalar(@q); @q = (qw( and or xor if else elsif unless until while for foreach )); @is_mccabe_logic_keyword{@q} = (1) x scalar(@q); } ## end BEGIN sub find_mccabe_count { my ($self) = @_; # Find the cumulative mccabe count to each token # Return '$rmccabe_count_sum' = ref to array with cumulative # mccabe count to each token $K # NOTE: This sub currently follows the definitions in Perl::Critic my $rmccabe_count_sum; my $rLL = $self->[_rLL_]; my $count = 0; my $Klimit = $self->[_Klimit_]; foreach my $KK ( 0 .. $Klimit ) { $rmccabe_count_sum->{$KK} = $count; my $type = $rLL->[$KK]->[_TYPE_]; if ( $type eq 'k' ) { my $token = $rLL->[$KK]->[_TOKEN_]; if ( $is_mccabe_logic_keyword{$token} ) { $count++ } } else { if ( $is_mccabe_logic_operator{$type} ) { $count++; } } } $rmccabe_count_sum->{ $Klimit + 1 } = $count; return $rmccabe_count_sum; } ## end sub find_mccabe_count } ## end closure find_mccabe_count sub find_code_line_count { my ($self) = @_; # Find the cumulative number of lines of code, excluding blanks, # comments and pod. # Return '$rcode_line_count' = ref to array with cumulative # code line count for each input line number. my $rcode_line_count; my $rLL = $self->[_rLL_]; my $rlines = $self->[_rlines_]; my $ix_line = -1; my $code_line_count = 0; # loop over all lines foreach my $line_of_tokens ( @{$rlines} ) { $ix_line++; # what type of line? my $line_type = $line_of_tokens->{_line_type}; # if 'CODE' it must be non-blank and non-comment if ( $line_type eq 'CODE' ) { my $rK_range = $line_of_tokens->{_rK_range}; my ( $Kfirst, $Klast ) = @{$rK_range}; if ( defined($Kfirst) ) { # it is non-blank my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1; if ( $jmax > 0 || $rLL->[$Klast]->[_TYPE_] ne '#' ) { # ok, it is a non-comment $code_line_count++; } } } # Count all other special line types except pod; # For a list of line types see sub 'process_all_lines' else { if ( $line_type !~ /^POD/ ) { $code_line_count++ } } # Store the cumulative count using the input line index $rcode_line_count->[$ix_line] = $code_line_count; } return $rcode_line_count; } ## end sub find_code_line_count sub find_selected_packages { my ( $self, $rdump_block_types ) = @_; # returns a list of all selected package statements in a file my @package_list; if ( !$rdump_block_types->{'*'} && !$rdump_block_types->{'package'} && !$rdump_block_types->{'class'} ) { return \@package_list; } my $rLL = $self->[_rLL_]; my $Klimit = $self->[_Klimit_]; my $rlines = $self->[_rlines_]; my $K_closing_container = $self->[_K_closing_container_]; my @package_sweep; foreach my $KK ( 0 .. $Klimit ) { my $item = $rLL->[$KK]; my $type = $item->[_TYPE_]; # fix for c250: package type has changed from 'i' to 'P' next if ( $type ne 'P' ); my $token = $item->[_TOKEN_]; if ( substr( $token, 0, 7 ) eq 'package' && $token =~ /^package\s/ || substr( $token, 0, 5 ) eq 'class' && $token =~ /^class\s/ ) { $token =~ s/\s+/ /g; my ( $keyword, $name ) = split /\s+/, $token, 2; my $lx_start = $item->[_LINE_INDEX_]; my $level = $item->[_LEVEL_]; my $parent_seqno = $self->parent_seqno_by_K($KK); # Skip a class BLOCK because it will be handled as a block if ( $keyword eq 'class' ) { my $line_of_tokens = $rlines->[$lx_start]; my $rK_range = $line_of_tokens->{_rK_range}; my ( $K_first, $K_last ) = @{$rK_range}; if ( $rLL->[$K_last]->[_TYPE_] eq '#' ) { $K_last = $self->K_previous_code($K_last); } if ( defined($K_last) ) { my $seqno_class = $rLL->[$K_last]->[_TYPE_SEQUENCE_]; my $block_type_next = $self->[_rblock_type_of_seqno_]->{$seqno_class}; # these block types are currently marked 'package' # but may be 'class' in the future, so allow both. if ( defined($block_type_next) && $block_type_next =~ /^(class|package)\b/ ) { next; } } } my $K_closing = $Klimit; if ( $parent_seqno != SEQ_ROOT ) { my $Kc = $K_closing_container->{$parent_seqno}; if ( defined($Kc) ) { $K_closing = $Kc; } } # This package ends any previous package at this level if ( defined( my $ix = $package_sweep[$level] ) ) { my $rpk = $package_list[$ix]; my $Kc = $rpk->{K_closing}; if ( $Kc > $KK ) { $rpk->{K_closing} = $KK - 1; } } $package_sweep[$level] = @package_list; # max_change and block_count are not currently reported 'package' push @package_list, { line_start => $lx_start + 1, K_opening => $KK, K_closing => $Klimit, name => $name, type => $keyword, level => $level, max_change => 0, block_count => 0, }; } } return \@package_list; } ## end sub find_selected_packages sub find_selected_blocks { my ( $self, $rdump_block_types ) = @_; # Find blocks needed for --dump-block-summary # Returns: # $rslected_blocks = ref to a list of information on the selected blocks my $rLL = $self->[_rLL_]; my $rlines = $self->[_rlines_]; my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; my $K_opening_container = $self->[_K_opening_container_]; my $K_closing_container = $self->[_K_closing_container_]; my $ris_asub_block = $self->[_ris_asub_block_]; my $ris_sub_block = $self->[_ris_sub_block_]; my $dump_all_types = $rdump_block_types->{'*'}; # Get level variation info for code blocks my $rlevel_info = $self->find_level_info(); my @selected_blocks; #--------------------------------------------------- # BEGIN loop over all blocks to find selected blocks #--------------------------------------------------- foreach my $seqno ( keys %{$rblock_type_of_seqno} ) { my $type; my $name = EMPTY_STRING; my $block_type = $rblock_type_of_seqno->{$seqno}; my $K_opening = $K_opening_container->{$seqno}; my $K_closing = $K_closing_container->{$seqno}; my $level = $rLL->[$K_opening]->[_LEVEL_]; my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_]; my $line_of_tokens = $rlines->[$lx_open]; my $rK_range = $line_of_tokens->{_rK_range}; my ( $Kfirst, $Klast ) = @{$rK_range}; if ( !defined($Kfirst) || !defined($Klast) || $Kfirst > $K_opening ) { my $line_type = $line_of_tokens->{_line_type}; # shouldn't happen my $CODE_type = $line_of_tokens->{_code_type}; DEVEL_MODE && Fault(<<EOM); unexpected line_type=$line_type at line $lx_open, code type=$CODE_type EOM next; } my ( $max_change, $block_count, $inner_loop_plus ) = ( 0, 0, EMPTY_STRING ); my $item = $rlevel_info->{$seqno}; if ( defined($item) ) { my $starting_depth = $item->{starting_depth}; my $maximum_depth = $item->{maximum_depth}; $block_count = $item->{block_count}; $max_change = $maximum_depth - $starting_depth + 1; # this is a '+' character if this block is an inner loops $inner_loop_plus = $item->{is_inner_loop} ? '+' : EMPTY_STRING; } # Skip closures unless type 'closure' is explicitly requested if ( ( $block_type eq '}' || $block_type eq ';' ) && $rdump_block_types->{'closure'} ) { $type = 'closure'; } # Both 'sub' and 'asub' select an anonymous sub. # This allows anonymous subs to be explicitely selected elsif ( $ris_asub_block->{$seqno} && ( $dump_all_types || $rdump_block_types->{'sub'} || $rdump_block_types->{'asub'} ) ) { $type = 'asub'; # Look back to try to find some kind of name, such as # my $var = sub { - var is type 'i' # var => sub { - var is type 'w' # -var => sub { - var is type 'w' # 'var' => sub { - var is type 'Q' my ( $saw_equals, $saw_fat_comma, $blank_count ); foreach my $KK ( reverse( $Kfirst .. $K_opening - 1 ) ) { my $token_type = $rLL->[$KK]->[_TYPE_]; if ( $token_type eq 'b' ) { $blank_count++; next } if ( $token_type eq '=>' ) { $saw_fat_comma++; next } if ( $token_type eq '=' ) { $saw_equals++; next } if ( $token_type eq 'i' && $saw_equals || ( $token_type eq 'w' || $token_type eq 'Q' ) && $saw_fat_comma ) { $name = $rLL->[$KK]->[_TOKEN_]; last; } } } elsif ( $ris_sub_block->{$seqno} && ( $dump_all_types || $rdump_block_types->{'sub'} ) ) { $type = 'sub'; # what we want: # $block_type $name # 'sub setidentifier($)' => 'setidentifier' # 'method setidentifier($)' => 'setidentifier' my @parts = split /\s+/, $block_type; $name = $parts[1]; $name =~ s/\(.*$//; } elsif ( $block_type =~ /^(package|class)\b/ && ( $dump_all_types || $rdump_block_types->{'package'} || $rdump_block_types->{'class'} ) ) { $type = 'class'; my @parts = split /\s+/, $block_type; $name = $parts[1]; $name =~ s/\(.*$//; } elsif ( $is_loop_type{$block_type} && ( $dump_all_types || $rdump_block_types->{$block_type} || $rdump_block_types->{ $block_type . $inner_loop_plus } || $rdump_block_types->{$inner_loop_plus} ) ) { $type = $block_type . $inner_loop_plus; } elsif ( $dump_all_types || $rdump_block_types->{$block_type} ) { if ( $is_loop_type{$block_type} ) { $name = $self->find_loop_label($seqno); } $type = $block_type; } else { next; } push @selected_blocks, { K_opening => $K_opening, K_closing => $K_closing, line_start => $lx_open + 1, name => $name, type => $type, level => $level, max_change => $max_change, block_count => $block_count, }; } ## END loop to get info for selected blocks return \@selected_blocks; } ## end sub find_selected_blocks sub dump_block_summary { my ($self) = @_; # Dump information about selected code blocks to STDOUT # This sub is called when # --dump-block-summary (-dbs) is set. # The following controls are available: # --dump-block-types=s (-dbt=s), where s is a list of block types # (if else elsif for foreach while do ... sub) ; default is 'sub' # --dump-block-minimum-lines=n (-dbml=n), where n is the minimum # number of lines for a block to be included; default is 20. my $rOpts_dump_block_types = $rOpts->{'dump-block-types'}; if ( !defined($rOpts_dump_block_types) ) { $rOpts_dump_block_types = 'sub' } $rOpts_dump_block_types =~ s/^\s+//; $rOpts_dump_block_types =~ s/\s+$//; my @list = split /\s+/, $rOpts_dump_block_types; my %dump_block_types; @{dump_block_types}{@list} = (1) x scalar(@list); # Get block info my $rselected_blocks = $self->find_selected_blocks( \%dump_block_types ); # Get package info my $rpackage_list = $self->find_selected_packages( \%dump_block_types ); return if ( !@{$rselected_blocks} && !@{$rpackage_list} ); my $input_stream_name = get_input_stream_name(); # Get code line count my $rcode_line_count = $self->find_code_line_count(); # Get mccabe count my $rmccabe_count_sum = $self->find_mccabe_count(); my $rOpts_dump_block_minimum_lines = $rOpts->{'dump-block-minimum-lines'}; if ( !defined($rOpts_dump_block_minimum_lines) ) { $rOpts_dump_block_minimum_lines = 20; } my $rLL = $self->[_rLL_]; # merge blocks and packages, add various counts, filter and print to STDOUT my $routput_lines = []; foreach my $item ( @{$rselected_blocks}, @{$rpackage_list} ) { my $K_opening = $item->{K_opening}; my $K_closing = $item->{K_closing}; # define total number of lines my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_]; my $lx_close = $rLL->[$K_closing]->[_LINE_INDEX_]; my $line_count = $lx_close - $lx_open + 1; # define total number of lines of code excluding blanks, comments, pod my $code_lines_open = $rcode_line_count->[$lx_open]; my $code_lines_close = $rcode_line_count->[$lx_close]; my $code_lines = 0; if ( defined($code_lines_open) && defined($code_lines_close) ) { $code_lines = $code_lines_close - $code_lines_open + 1; } # filter out blocks below the selected code line limit if ( $code_lines < $rOpts_dump_block_minimum_lines ) { next; } # add mccabe_count for this block my $mccabe_closing = $rmccabe_count_sum->{ $K_closing + 1 }; my $mccabe_opening = $rmccabe_count_sum->{$K_opening}; my $mccabe_count = 1; # add 1 to match Perl::Critic if ( defined($mccabe_opening) && defined($mccabe_closing) ) { $mccabe_count += $mccabe_closing - $mccabe_opening; } # Store the final set of print variables push @{$routput_lines}, [ $input_stream_name, $item->{line_start}, $line_count, $code_lines, $item->{type}, $item->{name}, $item->{level}, $item->{max_change}, $item->{block_count}, $mccabe_count, ]; } return unless @{$routput_lines}; # Sort blocks and packages on starting line number my @sorted_lines = sort { $a->[1] <=> $b->[1] } @{$routput_lines}; print {*STDOUT} "file,line,line_count,code_lines,type,name,level,max_change,block_count,mccabe_count\n"; foreach my $rline_vars (@sorted_lines) { my $line = join( ",", @{$rline_vars} ) . "\n"; print {*STDOUT} $line; } return; } ## end sub dump_block_summary sub set_ci { my ($self) = @_; # Set the basic continuation indentation (ci) for all tokens. # This is a replacement for the values previously computed in # sub Perl::Tidy::Tokenizer::tokenizer_wrapup. In most cases it # produces identical results, but in a few cases it is an improvement. use constant DEBUG_SET_CI => 0; # This turns on an optional piece of logic which makes the new and # old computations of ci agree. It has almost no effect on actual # programs but is useful for testing. use constant SET_CI_OPTION_0 => 1; # This is slightly different from the hash in in break_lists # with a similar name (removed '?' and ':' to fix t007 and others) my %is_logical_container_for_ci; my @q = qw# if elsif unless while and or err not && | || ! #; @is_logical_container_for_ci{@q} = (1) x scalar(@q); # This is slightly different from a tokenizer hash with a similar name: my %is_container_label_type_for_ci; @q = qw# k && | || ? : ! #; @is_container_label_type_for_ci{@q} = (1) x scalar(@q); # Undo ci of closing list paren followed by these binary operators: # - initially defined for issue t027, then # - added '=' for t015 # - added '=~' for 'locale.in' # - added '<=>' for 'corelist.in' # Note: # See @value_requestor_type for more that might be included # See also @is_binary_type my %bin_op_type; @q = qw# . ** -> + - / * = != ^ < > % >= <= =~ !~ <=> x #; @bin_op_type{@q} = (1) x scalar(@q); my %is_list_end_type; @q = qw( ; { } ); push @q, ','; @is_list_end_type{@q} = (1) x scalar(@q); my $rLL = $self->[_rLL_]; my $Klimit = $self->[_Klimit_]; return unless defined($Klimit); my $token = ';'; my $type = ';'; my $last_token = $token; my $last_type = $type; my $ci_last = 0; my $ci_next = 0; my $ci_next_next = 1; my $rstack = []; my $seq_root = SEQ_ROOT; my $rparent = { _seqno => $seq_root, _ci_open => 0, _ci_open_next => 0, _ci_close => 0, _ci_close_next => 0, _container_type => 'Block', _ci_next_next => $ci_next_next, _comma_count => 0, _semicolon_count => 0, _Kc => undef, }; # Debug stuff my @debug_lines; my %saw_ci_diff; my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; my $ris_sub_block = $self->[_ris_sub_block_]; my $ris_asub_block = $self->[_ris_asub_block_]; my $K_opening_container = $self->[_K_opening_container_]; my $K_closing_container = $self->[_K_closing_container_]; my $K_opening_ternary = $self->[_K_opening_ternary_]; my $K_closing_ternary = $self->[_K_closing_ternary_]; my $rlines = $self->[_rlines_]; my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_]; my $want_break_before_comma = $want_break_before{','}; my $map_block_follows = sub { # return true if a sort/map/etc block follows the closing brace # of container $seqno my ($seqno) = @_; my $Kc = $K_closing_container->{$seqno}; return unless defined($Kc); my $Kcn = $self->K_next_code($Kc); return unless defined($Kcn); my $seqno_n = $rLL->[$Kcn]->[_TYPE_SEQUENCE_]; #return if ( defined($seqno_n) ); return if ($seqno_n); my $Knn = $self->K_next_code($Kcn); return unless defined($Knn); my $seqno_nn = $rLL->[$Knn]->[_TYPE_SEQUENCE_]; return unless ($seqno_nn); my $K_nno = $K_opening_container->{$seqno_nn}; return unless $K_nno && $K_nno == $Knn; my $block_type = $rblock_type_of_seqno->{$seqno_nn}; if ($block_type) { return $is_block_with_ci{$block_type}; } return; }; my $redo_preceding_comment_ci = sub { # We need to reset the ci of the previous comment(s) my ( $K, $ci ) = @_; my $Km = $self->K_previous_code($K); return if ( !defined($Km) ); foreach my $Kt ( $Km + 1 .. $K - 1 ) { if ( $rLL->[$Kt]->[_TYPE_] eq '#' ) { $rLL->[$Kt]->[_CI_LEVEL_] = $ci; } } return; }; # Definitions of the sequence of ci_values being maintained: # $ci_last = the ci value of the previous non-blank, non-comment token # $ci_this = the ci value to be stored for this token at index $KK # $ci_next = the normal ci for the next token, set by the previous tok # $ci_next_next = the normal next value of $ci_next in this container #-------------------------- # Main loop over all tokens #-------------------------- my $KK = -1; foreach my $rtoken_K ( @{$rLL} ) { $KK++; $type = $rtoken_K->[_TYPE_]; #------------------ # Section 1. Blanks #------------------ if ( $type eq 'b' ) { $rtoken_K->[_CI_LEVEL_] = $ci_next; # 'next' to avoid saving last_ values for blanks and commas next; } #-------------------- # Section 2. Comments #-------------------- if ( $type eq '#' ) { my $ci_this = $ci_next; # If at '#' in ternary before a ? or :, use that level to make # the comment line up with the next ? or : line. (see c202/t052) # i.e. if a nested ? follows, we increase the '#' level by 1, and # if a nested : follows, we decrease the '#' level by 1. # This is the only place where this sub changes a _LEVEL_ value. my $Kn; my $parent_container_type = $rparent->{_container_type}; if ( $parent_container_type eq 'Ternary' ) { $Kn = $self->K_next_code($KK); if ($Kn) { my $type_kn = $rLL->[$Kn]->[_TYPE_]; if ( $is_ternary{$type_kn} ) { my $level_KK = $rLL->[$KK]->[_LEVEL_]; my $level_Kn = $rLL->[$Kn]->[_LEVEL_]; $rLL->[$KK]->[_LEVEL_] = $rLL->[$Kn]->[_LEVEL_]; # and use the ci of a terminating ':' if ( $Kn == $rparent->{_Kc} ) { $ci_this = $rparent->{_ci_close}; } } } } # Undo ci for a block comment followed by a closing token or , or ; # provided that the parent container: # - ends without ci, or # - starts ci=0 and is a comma list or this follows a closing type # - has a level jump if ( $ci_this && ( !$rparent->{_ci_close} || ( !$rparent->{_ci_open_next} && ( ( $rparent->{_comma_count} || $last_type eq ',' ) || $is_closing_type{$last_type} ) ) ) ) { # Be sure this is a block comment my $lx = $rtoken_K->[_LINE_INDEX_]; my $rK_range = $rlines->[$lx]->{_rK_range}; my $Kfirst; if ($rK_range) { $Kfirst = $rK_range->[0] } if ( defined($Kfirst) && $Kfirst == $KK ) { # Look for trailing closing token # [ and possibly ',' or ';' ] $Kn = $self->K_next_code($KK) if ( !$Kn ); my $Kc = $rparent->{_Kc}; if ( $Kn && $Kc && ( $Kn == $Kc # only look for comma if -wbb=',' is set # to minimize changes to existing formatting || ( $rLL->[$Kn]->[_TYPE_] eq ',' && $want_break_before_comma && $parent_container_type eq 'List' ) # do not look ahead for a bare ';' because # it changes old formatting with little benefit. ## || ( $rLL->[$Kn]->[_TYPE_] eq ';' ## && $parent_container_type eq 'Block' ) ) ) { # Be sure container has a level jump my $level_KK = $rLL->[$KK]->[_LEVEL_]; my $level_Kc = $rLL->[$Kc]->[_LEVEL_]; if ( $level_Kc < $level_KK ) { $ci_this = 0; } } } } $ci_next = $ci_this; $rtoken_K->[_CI_LEVEL_] = $ci_this; # 'next' to avoid saving last_ values for blanks and commas next; } #------------------------------------------------------------ # Section 3. Continuing with non-blank and non-comment tokens #------------------------------------------------------------ $token = $rtoken_K->[_TOKEN_]; # Set ci values appropriate for most tokens: my $ci_this = $ci_next; $ci_next = $ci_next_next; # Now change these ci values as necessary for special cases... #---------------------------- # Section 4. Container tokens #---------------------------- if ( $rtoken_K->[_TYPE_SEQUENCE_] ) { my $seqno = $rtoken_K->[_TYPE_SEQUENCE_]; #------------------------------------- # Section 4.1 Opening container tokens #------------------------------------- if ( $is_opening_sequence_token{$token} ) { my $level = $rtoken_K->[_LEVEL_]; # Default ci values for the closing token, to be modified # as necessary: my $ci_close = $ci_next; my $ci_close_next = $ci_next_next; my $Kc = $type eq '?' ? $K_closing_ternary->{$seqno} : $K_closing_container->{$seqno}; # $Kn = $self->K_next_nonblank($KK); my $Kn; if ( $KK < $Klimit ) { $Kn = $KK + 1; if ( $rLL->[$Kn]->[_TYPE_] eq 'b' && $Kn < $Klimit ) { $Kn += 1; } } # $Kcn = $self->K_next_code($Kc); my $Kcn; if ( $Kc && $Kc < $Klimit ) { $Kcn = $Kc + 1; if ( $rLL->[$Kcn]->[_TYPE_] eq 'b' && $Kcn < $Klimit ) { $Kcn += 1; } if ( $rLL->[$Kcn]->[_TYPE_] eq '#' ) { $Kcn = $self->K_next_code($Kcn); } } my $opening_level_jump = $Kn ? $rLL->[$Kn]->[_LEVEL_] - $level : 0; # initialize ci_next_next to its standard value $ci_next_next = 1; # Default: ci of first item of list with level jump is same as # ci of first item of container if ( $opening_level_jump > 0 ) { $ci_next = $rparent->{_ci_open_next}; } my ( $comma_count, $semicolon_count ); my $rtype_count = $rtype_count_by_seqno->{$seqno}; if ($rtype_count) { $comma_count = $rtype_count->{','}; $semicolon_count = $rtype_count->{';'}; # Do not include a terminal semicolon in the count (the # comma_count has already been corrected by respace_tokens) # We only need to know if there are semicolons or not, so # for speed we can just do this test if the count is 1. if ( $semicolon_count && $semicolon_count == 1 ) { my $Kcm = $self->K_previous_code($Kc); if ( $rLL->[$Kcm]->[_TYPE_] eq ';' ) { $semicolon_count--; } } } my $container_type; #------------------------- # Section 4.1.1 Code Block #------------------------- my $block_type = $rblock_type_of_seqno->{$seqno}; if ($block_type) { $container_type = 'Block'; # set default depending on block type $ci_close = 0; my $no_semicolon = $is_block_without_semicolon{$block_type} || $ris_sub_block->{$seqno} || $last_type eq 'J'; if ( !$no_semicolon ) { # Optional fix for block types sort/map/etc which use # zero ci at terminal brace if previous keyword had # zero ci. This will cause sort/map/grep filter blocks # to line up. Note that sub 'undo_ci' will also try to # do this, so this is not a critical operation. if ( $is_block_with_ci{$block_type} ) { my $parent_seqno = $rparent->{_seqno}; my $rtype_count_p = $rtype_count_by_seqno->{$parent_seqno}; if ( # only do this within containers $parent_seqno != SEQ_ROOT # only in containers without ',' and ';' && !$rparent->{_comma_count} && !$rparent->{_semicolon_count} && $map_block_follows->($seqno) ) { if ($ci_last) { $ci_close = $ci_this; } } else { $ci_close = $ci_this; } } # keep ci if certain operators follow (fix c202/t024) if ( !$ci_close && $Kcn ) { my $type_kcn = $rLL->[$Kcn]->[_TYPE_]; my $token_kcn = $rLL->[$Kcn]->[_TOKEN_]; if ( $type_kcn =~ /^(\.|\&\&|\|\|)$/ || $type_kcn eq 'k' && $is_and_or{$token_kcn} ) { $ci_close = $ci_this; } } } if ( $rparent->{_container_type} ne 'Ternary' ) { $ci_this = 0; } $ci_next = 0; $ci_close_next = $ci_close; } #---------------------- # Section 4.1.2 Ternary #---------------------- elsif ( $type eq '?' ) { $container_type = 'Ternary'; if ( $rparent->{_container_type} eq 'List' && !$rparent->{_ci_open_next} ) { $ci_this = 0; $ci_close = 0; } # redo ci of any preceding comments if necessary # at an outermost ? (which has no level jump) if ( !$opening_level_jump ) { $redo_preceding_comment_ci->( $KK, $ci_this ); } } #------------------------------- # Section 4.1.3 Logical or List? #------------------------------- else { my $is_logical = $is_container_label_type_for_ci{$last_type} && $is_logical_container_for_ci{$last_token} # Part 1 of optional patch to get agreement with previous # ci This makes almost no difference in a typical program # because we will seldom break within an array index. || $type eq '[' && SET_CI_OPTION_0; if ( !$is_logical && $token eq '(' ) { # 'foreach' and 'for' paren contents are treated as # logical except for C-style 'for' if ( $last_type eq 'k' ) { $is_logical ||= $last_token eq 'foreach'; # C-style 'for' container will be type 'List' if ( $last_token eq 'for' ) { $is_logical = !( $rtype_count && $rtype_count->{'f'} ); } } # Check for 'for' and 'foreach' loops with iterators elsif ( $last_type eq 'i' && defined($Kcn) ) { my $seqno_kcn = $rLL->[$Kcn]->[_TYPE_SEQUENCE_]; my $type_kcn = $rLL->[$Kcn]->[_TOKEN_]; if ( $seqno_kcn && $type_kcn eq '{' ) { my $block_type_kcn = $rblock_type_of_seqno->{$seqno_kcn}; $is_logical ||= $block_type_kcn && ( $block_type_kcn eq 'for' || $block_type_kcn eq 'foreach' ); } # Search backwards for 'for'/'foreach' with # iterator in case user is running from an editor # and did not include the block (fixes case # 'xci.in'). my $Km = $self->K_previous_code($KK); foreach ( 0 .. 2 ) { $Km = $self->K_previous_code($Km); last unless defined($Km); last unless $rLL->[$Km]->[_TYPE_] eq 'k'; my $tok = $rLL->[$Km]->[_TOKEN_]; next if $tok eq 'my'; $is_logical ||= ( $tok eq 'for' || $tok eq 'foreach' ); last; } } elsif ( $last_token eq '(' ) { $is_logical ||= $rparent->{_container_type} eq 'Logical'; } else { ## ok - none of the above } } #------------------------ # Section 4.1.3.1 Logical #------------------------ if ($is_logical) { $container_type = 'Logical'; # Pass ci though an '!' if ( $last_type eq '!' ) { $ci_this = $ci_last } $ci_next_next = 0; $ci_close_next = $ci_this; # Part 2 of optional patch to get agreement with # previous ci if ( $type eq '[' && SET_CI_OPTION_0 ) { $ci_next_next = $ci_this; # Undo ci at a chain of indexes or hash keys if ( $last_type eq '}' ) { $ci_this = $ci_last; } } if ($opening_level_jump) { $ci_next = 0; } } #--------------------- # Section 4.1.3.2 List #--------------------- else { # Here 'List' is a catchall for none of the above types $container_type = 'List'; # lists in blocks ... if ( $rparent->{_container_type} eq 'Block' ) { # undo ci if another closing token follows if ( defined($Kcn) ) { my $closing_level_jump = $rLL->[$Kcn]->[_LEVEL_] - $level; if ( $closing_level_jump < 0 ) { $ci_close = $ci_this; } } } # lists not in blocks ... else { if ( !$rparent->{_comma_count} ) { $ci_close = $ci_this; # undo ci at binary op after right paren if no # commas in container; fixes t027, t028 if ( $ci_close_next != $ci_close && defined($Kcn) && $bin_op_type{ $rLL->[$Kcn]->[_TYPE_] } ) { $ci_close_next = $ci_close; } } if ( $rparent->{_container_type} eq 'Ternary' ) { $ci_next = 0; } } # Undo ci at a chain of indexes or hash keys if ( $token ne '(' && $last_type eq '}' ) { $ci_this = $ci_close = $ci_last; } } } #--------------------------------------- # Section 4.1.4 Store opening token info #--------------------------------------- # Most closing tokens should align with their opening tokens. if ( $type eq '{' && $token ne '(' && $is_list_end_type{$last_type} # avoid asub blocks, which may have prototypes ending in '}' && !$ris_asub_block->{$seqno} ) { $ci_close = $ci_this; } # Closing ci must never be less than opening if ( $ci_close < $ci_this ) { $ci_close = $ci_this } push @{$rstack}, $rparent; $rparent = { _seqno => $seqno, _container_type => $container_type, _ci_next_next => $ci_next_next, _ci_open => $ci_this, _ci_open_next => $ci_next, _ci_close => $ci_close, _ci_close_next => $ci_close_next, _comma_count => $comma_count, _semicolon_count => $semicolon_count, _Kc => $Kc, }; } #------------------------------------- # Section 4.2 Closing container tokens #------------------------------------- else { my $seqno_test = $rparent->{_seqno}; if ( $seqno_test ne $seqno ) { # Shouldn't happen if we are processing balanced text. # (Unbalanced text should go out verbatim) DEVEL_MODE && Fault("stack error: $seqno_test != $seqno\n"); } # Use ci_this, ci_next values set by the matching opening token: $ci_this = $rparent->{_ci_close}; $ci_next = $rparent->{_ci_close_next}; my $ci_open_old = $rparent->{_ci_open}; # Then pop the stack and use the parent ci_next_next value: if ( @{$rstack} ) { $rparent = pop @{$rstack}; $ci_next_next = $rparent->{_ci_next_next}; } else { # Shouldn't happen if we are processing balanced text. DEVEL_MODE && Fault("empty stack - shouldn't happen\n"); } # Fix: undo ci at a closing token followed by a closing token. # Goal is to keep formatting independent of the existence of a # trailing comma or semicolon. if ( $ci_this > 0 && !$ci_open_old && !$rparent->{_ci_close} ) { my $Kc = $rparent->{_Kc}; my $Kn = $self->K_next_code($KK); if ( $Kc && $Kn && $Kc == $Kn ) { $ci_this = $ci_next = 0; } } } } #--------------------------------- # Section 5. Semicolons and Labels #--------------------------------- # The next token after a ';' and label (type 'J') starts a new stmt # The ci after a C-style for ';' (type 'f') is handled similarly. elsif ( $type eq ';' || $type eq 'J' || $type eq 'f' ) { $ci_next = 0; if ( $is_closing_type{$last_type} ) { $ci_this = $ci_last } } #-------------------- # Section 6. Keywords #-------------------- # Undo ci after a format statement elsif ( $type eq 'k' ) { if ( substr( $token, 0, 6 ) eq 'format' ) { $ci_next = 0; } } #------------------ # Section 7. Commas #------------------ # A comma and the subsequent item normally have ci undone # unless ci has been set at a lower level elsif ( $type eq ',' ) { if ( $rparent->{_container_type} eq 'List' ) { $ci_this = $ci_next = $rparent->{_ci_open_next}; } } #--------------------------------- # Section 8. Hanging side comments #--------------------------------- # Treat hanging side comments like blanks elsif ( $type eq 'q' && $token eq EMPTY_STRING ) { $ci_next = $ci_this; $rtoken_K->[_CI_LEVEL_] = $ci_this; # 'next' to avoid saving last_ values for blanks and commas next; } else { ## ok - not a special type for ci } # Save debug info if requested DEBUG_SET_CI && do { my $seqno = $rtoken_K->[_TYPE_SEQUENCE_]; my $level = $rtoken_K->[_LEVEL_]; my $ci = $rtoken_K->[_CI_LEVEL_]; if ( $ci > 1 ) { $ci = 1 } my $tok = $token; my $last_tok = $last_token; $tok =~ s/\t//g; $last_tok =~ s/\t//g; $tok = length($tok) > 3 ? substr( $tok, 0, 8 ) : $tok; $last_tok = length($last_tok) > 3 ? substr( $last_tok, 0, 8 ) : $last_tok; $tok =~ s/["']//g; $last_tok =~ s/["']//g; my $block_type; $block_type = $rblock_type_of_seqno->{$seqno} if ($seqno); $block_type = EMPTY_STRING unless ($block_type); my $ptype = $rparent->{_container_type}; my $pname = $ptype; my $error = $ci_this == $ci ? EMPTY_STRING : $type eq 'b' ? "error" : "ERROR"; if ($error) { $saw_ci_diff{$KK} = 1 } my $lno = $rtoken_K->[_LINE_INDEX_] + 1; $debug_lines[$KK] = <<EOM; $lno\t$ci\t$ci_this\t$ci_next\t$last_type\t$last_tok\t$type\t$tok\t$seqno\t$level\t$pname\t$block_type\t$error EOM }; #---------------------------------- # Store the ci value for this token #---------------------------------- $rtoken_K->[_CI_LEVEL_] = $ci_this; # Remember last nonblank, non-comment token info for the next pass $ci_last = $ci_this; $last_token = $token; $last_type = $type; } ## End main loop over tokens #---------------------- # Post-loop operations: #---------------------- # if the logfile is saved, we need to save the leading ci of # each old line of code. if ( $self->[_save_logfile_] ) { foreach my $line_of_tokens ( @{$rlines} ) { my $line_type = $line_of_tokens->{_line_type}; next if ( $line_type ne 'CODE' ); my ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} }; next if ( !defined($Kfirst) ); $line_of_tokens->{_ci_level_0} = $rLL->[$Kfirst]->[_CI_LEVEL_]; } } if (DEBUG_SET_CI) { my @output_lines; foreach my $KK ( 0 .. $Klimit ) { my $line = $debug_lines[$KK]; if ($line) { my $Kp = $self->K_previous_code($KK); my $Kn = $self->K_next_code($KK); if ( DEBUG_SET_CI > 1 || $Kp && $saw_ci_diff{$Kp} || $saw_ci_diff{$KK} || $Kn && $saw_ci_diff{$Kn} ) { push @output_lines, $line; } } } if (@output_lines) { unshift @output_lines, <<EOM; lno\tci\tci_this\tci_next\tlast_type\tlast_tok\ttype\ttok\tseqno\tlevel\tpname\tblock_type\terror? EOM foreach my $line (@output_lines) { chomp $line; print {*STDOUT} $line, "\n"; } } } return; } ## end sub set_ci sub set_CODE_type { my ($self) = @_; # Examine each line of code and set a flag '$CODE_type' to describe it. # Also return a list of lines with side comments. my $rLL = $self->[_rLL_]; my $rlines = $self->[_rlines_]; my $rOpts_format_skipping_begin = $rOpts->{'format-skipping-begin'}; my $rOpts_format_skipping_end = $rOpts->{'format-skipping-end'}; my $rOpts_static_block_comment_prefix = $rOpts->{'static-block-comment-prefix'}; # Remember indexes of lines with side comments my @ix_side_comments; my $In_format_skipping_section = 0; my $Saw_VERSION_in_this_file = 0; my $has_side_comment = 0; my $last_line_had_side_comment = 0; my ( $Kfirst, $Klast ); my $CODE_type; # Loop to set CODE_type # Possible CODE_types # 'VB' = Verbatim - line goes out verbatim (a quote) # 'FS' = Format Skipping - line goes out verbatim # 'BL' = Blank Line # 'HSC' = Hanging Side Comment - fix this hanging side comment # 'SBCX'= Static Block Comment Without Leading Space # 'SBC' = Static Block Comment # 'BC' = Block Comment - an ordinary full line comment # 'IO' = Indent Only - line goes out unchanged except for indentation # 'NIN' = No Internal Newlines - line does not get broken # 'VER' = VERSION statement # '' = ordinary line of code with no restrictions my $ix_line = -1; foreach my $line_of_tokens ( @{$rlines} ) { $ix_line++; my $line_type = $line_of_tokens->{_line_type}; my $last_CODE_type = $CODE_type; $CODE_type = EMPTY_STRING; if ( $line_type ne 'CODE' ) { next; } my $input_line = $line_of_tokens->{_line_text}; my $Klast_prev = $Klast; ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} }; my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1; my $is_block_comment; if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) { if ( $jmax == 0 ) { $is_block_comment = 1; } else { $has_side_comment = 1 } } # Write line verbatim if we are in a formatting skip section if ($In_format_skipping_section) { # Note: extra space appended to comment simplifies pattern matching if ( $is_block_comment # optional fast pre-check && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#>>>' || $rOpts_format_skipping_end ) && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~ /$format_skipping_pattern_end/ ) { $In_format_skipping_section = 0; my $input_line_no = $line_of_tokens->{_line_number}; write_logfile_entry( "Line $input_line_no: Exiting format-skipping section\n"); } elsif ( $is_block_comment # optional fast pre-check && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<' || $rOpts_format_skipping_begin ) && $rOpts_format_skipping && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~ /$format_skipping_pattern_begin/ ) { # warn of duplicate starting comment lines, git #118 my $input_line_no = $line_of_tokens->{_line_number}; warning( "Already in format-skipping section which started at line $In_format_skipping_section\n", $input_line_no ); } else { ## ok - not at a format skipping control line } $CODE_type = 'FS'; next; } # Check for a continued quote.. if ( $line_of_tokens->{_starting_in_quote} ) { # A line which is entirely a quote or pattern must go out # verbatim. Note: the \n is contained in $input_line. if ( $jmax <= 0 ) { if ( $self->[_save_logfile_] && $input_line =~ /\t/ ) { my $input_line_number = $line_of_tokens->{_line_number}; $self->note_embedded_tab($input_line_number); } $CODE_type = 'VB'; next; } } # See if we are entering a formatting skip section if ( $is_block_comment # optional fast pre-check && ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<' || $rOpts_format_skipping_begin ) && $rOpts_format_skipping && ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~ /$format_skipping_pattern_begin/ ) { my $input_line_no = $line_of_tokens->{_line_number}; $In_format_skipping_section = $input_line_no; write_logfile_entry( "Line $input_line_no: Entering format-skipping section\n"); $CODE_type = 'FS'; next; } # ignore trailing blank tokens (they will get deleted later) if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) { $jmax--; } # blank line.. if ( $jmax < 0 ) { $CODE_type = 'BL'; next; } # Handle comments if ($is_block_comment) { # see if this is a static block comment (starts with ## by default) my $is_static_block_comment = 0; my $no_leading_space = substr( $input_line, 0, 1 ) eq '#'; if ( # optional fast pre-check ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 2 ) eq '##' || $rOpts_static_block_comment_prefix ) && $rOpts_static_block_comments && $input_line =~ /$static_block_comment_pattern/ ) { $is_static_block_comment = 1; } # Check for comments which are line directives # Treat exactly as static block comments without leading space # reference: perlsyn, near end, section Plain Old Comments (Not!) # example: '# line 42 "new_filename.plx"' if ( $no_leading_space && $input_line =~ m{^\# \s* line \s+ (\d+) \s* (?:\s("?)([^"]+)\2)? \s* $}x ) { $is_static_block_comment = 1; } # look for hanging side comment ... if ( $last_line_had_side_comment # this follows as side comment && !$no_leading_space # with some leading space, and && !$is_static_block_comment # this is not a static comment ) { # continuing an existing HSC chain? if ( $last_CODE_type eq 'HSC' ) { $has_side_comment = 1; $CODE_type = 'HSC'; next; } # starting a new HSC chain? if ( $rOpts->{'hanging-side-comments'} # user is allowing # hanging side comments # like this && ( defined($Klast_prev) && $Klast_prev > 1 ) # and the previous side comment was not static (issue c070) && !( $rOpts->{'static-side-comments'} && $rLL->[$Klast_prev]->[_TOKEN_] =~ /$static_side_comment_pattern/ ) ) { # and it is not a closing side comment (issue c070). my $K_penult = $Klast_prev - 1; $K_penult -= 1 if ( $rLL->[$K_penult]->[_TYPE_] eq 'b' ); my $follows_csc = ( $rLL->[$K_penult]->[_TOKEN_] eq '}' && $rLL->[$K_penult]->[_TYPE_] eq '}' && $rLL->[$Klast_prev]->[_TOKEN_] =~ /$closing_side_comment_prefix_pattern/ ); if ( !$follows_csc ) { $has_side_comment = 1; $CODE_type = 'HSC'; next; } } } if ($is_static_block_comment) { $CODE_type = $no_leading_space ? 'SBCX' : 'SBC'; next; } elsif ($last_line_had_side_comment && !$rOpts_maximum_consecutive_blank_lines && $rLL->[$Kfirst]->[_LEVEL_] > 0 ) { # Emergency fix to keep a block comment from becoming a hanging # side comment. This fix is for the case that blank lines # cannot be inserted. There is related code in sub # 'process_line_of_CODE' $CODE_type = 'SBCX'; next; } else { $CODE_type = 'BC'; next; } } # End of comments. Handle a line of normal code: if ($rOpts_indent_only) { $CODE_type = 'IO'; next; } if ( !$rOpts_add_newlines ) { $CODE_type = 'NIN'; next; } # Patch needed for MakeMaker. Do not break a statement # in which $VERSION may be calculated. See MakeMaker.pm; # this is based on the coding in it. # The first line of a file that matches this will be eval'd: # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ # Examples: # *VERSION = \'1.01'; # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/; # We will pass such a line straight through without breaking # it unless -npvl is used. # Patch for problem reported in RT #81866, where files # had been flattened into a single line and couldn't be # tidied without -npvl. There are two parts to this patch: # First, it is not done for a really long line (80 tokens for now). # Second, we will only allow up to one semicolon # before the VERSION. We need to allow at least one semicolon # for statements like this: # require Exporter; our $VERSION = $Exporter::VERSION; # where both statements must be on a single line for MakeMaker if ( !$Saw_VERSION_in_this_file && $jmax < 80 && $input_line =~ /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ ) { $Saw_VERSION_in_this_file = 1; write_logfile_entry("passing VERSION line; -npvl deactivates\n"); # This code type has lower priority than others $CODE_type = 'VER'; next; } } continue { $line_of_tokens->{_code_type} = $CODE_type; $last_line_had_side_comment = $has_side_comment; if ($has_side_comment) { push @ix_side_comments, $ix_line; $has_side_comment = 0; } } return \@ix_side_comments; } ## end sub set_CODE_type sub find_non_indenting_braces { my ( $self, $rix_side_comments ) = @_; # Find and mark all non-indenting braces in this file. # Given: # $rix_side_comments = index of lines which have side comments # Find and save the line indexes of these special side comments in: # $self->[_rseqno_non_indenting_brace_by_ix_]; # Non-indenting braces are opening braces of the form # { #<<< ... # which do not cause an increase in indentation level. # They are enabled with the --non-indenting-braces, or -nib, flag. return unless ( $rOpts->{'non-indenting-braces'} ); my $rLL = $self->[_rLL_]; return unless ( defined($rLL) && @{$rLL} ); my $rlines = $self->[_rlines_]; my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; my $rseqno_non_indenting_brace_by_ix = $self->[_rseqno_non_indenting_brace_by_ix_]; foreach my $ix ( @{$rix_side_comments} ) { my $line_of_tokens = $rlines->[$ix]; my $line_type = $line_of_tokens->{_line_type}; if ( $line_type ne 'CODE' ) { # shouldn't happen DEVEL_MODE && Fault("unexpected line_type=$line_type\n"); next; } my $rK_range = $line_of_tokens->{_rK_range}; my ( $Kfirst, $Klast ) = @{$rK_range}; if ( !defined($Kfirst) || $rLL->[$Klast]->[_TYPE_] ne '#' ) { # shouldn't happen DEVEL_MODE && Fault("did not get a comment\n"); next; } next if ( $Klast <= $Kfirst ); # maybe HSC my $token_sc = $rLL->[$Klast]->[_TOKEN_]; my $K_m = $Klast - 1; my $type_m = $rLL->[$K_m]->[_TYPE_]; if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m--; $type_m = $rLL->[$K_m]->[_TYPE_]; } my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_]; if ($seqno_m) { my $block_type_m = $rblock_type_of_seqno->{$seqno_m}; # The pattern ends in \s but we have removed the newline, so # we added it back for the match. That way we require an exact # match to the special string and also allow additional text. $token_sc .= "\n"; if ( $block_type_m && $is_opening_type{$type_m} && $token_sc =~ /$non_indenting_brace_pattern/ ) { $rseqno_non_indenting_brace_by_ix->{$ix} = $seqno_m; } } } return; } ## end sub find_non_indenting_braces sub delete_side_comments { my ( $self, $rix_side_comments ) = @_; # Given a list of indexes of lines with side comments, handle any # requested side comment deletions. my $rLL = $self->[_rLL_]; my $rlines = $self->[_rlines_]; my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; my $rseqno_non_indenting_brace_by_ix = $self->[_rseqno_non_indenting_brace_by_ix_]; foreach my $ix ( @{$rix_side_comments} ) { my $line_of_tokens = $rlines->[$ix]; my $line_type = $line_of_tokens->{_line_type}; # This fault shouldn't happen because we only saved CODE lines with # side comments in the TASK 1 loop above. if ( $line_type ne 'CODE' ) { if (DEVEL_MODE) { my $lno = $ix + 1; Fault(<<EOM); Hit unexpected line_type = '$line_type' near line $lno while deleting side comments, should be 'CODE' EOM } next; } my $CODE_type = $line_of_tokens->{_code_type}; my $rK_range = $line_of_tokens->{_rK_range}; my ( $Kfirst, $Klast ) = @{$rK_range}; if ( !defined($Kfirst) || $rLL->[$Klast]->[_TYPE_] ne '#' ) { if (DEVEL_MODE) { my $lno = $ix + 1; Fault(<<EOM); Did not find side comment near line $lno while deleting side comments EOM } next; } my $delete_side_comment = $rOpts_delete_side_comments && ( $Klast > $Kfirst || $CODE_type eq 'HSC' ) && (!$CODE_type || $CODE_type eq 'HSC' || $CODE_type eq 'IO' || $CODE_type eq 'NIN' ); # Do not delete special control side comments if ( $rseqno_non_indenting_brace_by_ix->{$ix} ) { $delete_side_comment = 0; } if ( $rOpts_delete_closing_side_comments && !$delete_side_comment && $Klast > $Kfirst && ( !$CODE_type || $CODE_type eq 'HSC' || $CODE_type eq 'IO' || $CODE_type eq 'NIN' ) ) { my $token = $rLL->[$Klast]->[_TOKEN_]; my $K_m = $Klast - 1; my $type_m = $rLL->[$K_m]->[_TYPE_]; if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- } my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_]; if ($seqno_m) { my $block_type_m = $rblock_type_of_seqno->{$seqno_m}; if ( $block_type_m && $token =~ /$closing_side_comment_prefix_pattern/ && $block_type_m =~ /$closing_side_comment_list_pattern/ ) { $delete_side_comment = 1; } } } ## end if ( $rOpts_delete_closing_side_comments...) if ($delete_side_comment) { # We are actually just changing the side comment to a blank. # This may produce multiple blanks in a row, but sub respace_tokens # will check for this and fix it. $rLL->[$Klast]->[_TYPE_] = 'b'; $rLL->[$Klast]->[_TOKEN_] = SPACE; # The -io option outputs the line text, so we have to update # the line text so that the comment does not reappear. if ( $CODE_type eq 'IO' ) { my $line = EMPTY_STRING; foreach my $KK ( $Kfirst .. $Klast - 1 ) { $line .= $rLL->[$KK]->[_TOKEN_]; } $line =~ s/\s+$//; $line_of_tokens->{_line_text} = $line . "\n"; } # If we delete a hanging side comment the line becomes blank. if ( $CODE_type eq 'HSC' ) { $line_of_tokens->{_code_type} = 'BL' } } } return; } ## end sub delete_side_comments sub dump_verbatim { my $self = shift; # Dump the input file to the output verbatim. This is called when # there is a severe error and formatted output cannot be made. my $rlines = $self->[_rlines_]; foreach my $line ( @{$rlines} ) { my $input_line = $line->{_line_text}; $self->write_unindented_line($input_line); } return; } ## end sub dump_verbatim my %wU; my %wiq; my %is_witPS; my %is_sigil; my %is_nonlist_keyword; my %is_nonlist_type; my %is_s_y_m_slash; my %is_unexpected_equals; my %is_ascii_type; BEGIN { # added 'U' to fix cases b1125 b1126 b1127 my @q = qw(w U); @{wU}{@q} = (1) x scalar(@q); @q = qw(w i q Q G C Z); @{wiq}{@q} = (1) x scalar(@q); @q = qw(w i t P S); # Fix for c250: added new types 'P', 'S', formerly 'i' @{is_witPS}{@q} = (1) x scalar(@q); @q = qw($ & % * @); @{is_sigil}{@q} = (1) x scalar(@q); # Parens following these keywords will not be marked as lists. Note that # 'for' is not included and is handled separately, by including 'f' in the # hash %is_counted_type, since it may or may not be a c-style for loop. @q = qw( if elsif unless and or ); @is_nonlist_keyword{@q} = (1) x scalar(@q); # Parens following these types will not be marked as lists @q = qw( && || ); @is_nonlist_type{@q} = (1) x scalar(@q); @q = qw( s y m / ); @is_s_y_m_slash{@q} = (1) x scalar(@q); @q = qw( = == != ); @is_unexpected_equals{@q} = (1) x scalar(@q); # We can always skip expensive length_function->() calls for these # ascii token types @q = qw# b k L R ; ( { [ ? : ] } ) f t n v F p m pp mm .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> ( ) <= >= == =~ !~ != ++ -- /= x= ... **= <<= >>= &&= ||= //= <=> + - / * | % ! x ~ = \ ? : . < > ^ & #; push @q, ','; @is_ascii_type{@q} = (1) x scalar(@q); } ## end BEGIN { #<<< begin closure respace_tokens my $rLL_new; # This will be the new array of tokens # These are variables in $self my $rLL; my $length_function; my $K_closing_ternary; my $K_opening_ternary; my $rchildren_of_seqno; my $rhas_broken_code_block; my $rhas_broken_list; my $rhas_broken_list_with_lec; my $rhas_code_block; my $rhas_list; my $rhas_ternary; my $ris_assigned_structure; my $ris_broken_container; my $ris_excluded_lp_container; my $ris_list_by_seqno; my $ris_permanently_broken; my $rlec_count_by_seqno; my $roverride_cab3; my $rparent_of_seqno; my $rtype_count_by_seqno; my $rblock_type_of_seqno; my $K_opening_container; my $K_closing_container; my %K_first_here_doc_by_seqno; my $last_nonblank_code_type; my $last_nonblank_code_token; my $last_nonblank_block_type; my $last_last_nonblank_code_type; my $last_last_nonblank_code_token; my %seqno_stack; my %K_old_opening_by_seqno; my $depth_next; my $depth_next_max; my $cumulative_length; # Variables holding the current line info my $Ktoken_vars; my $Kfirst_old; my $Klast_old; my $Klast_old_code; my $CODE_type; my $rwhitespace_flags; sub initialize_respace_tokens_closure { my ($self) = @_; $rLL_new = []; # This is the new array $rLL = $self->[_rLL_]; $length_function = $self->[_length_function_]; $K_closing_ternary = $self->[_K_closing_ternary_]; $K_opening_ternary = $self->[_K_opening_ternary_]; $rchildren_of_seqno = $self->[_rchildren_of_seqno_]; $rhas_broken_code_block = $self->[_rhas_broken_code_block_]; $rhas_broken_list = $self->[_rhas_broken_list_]; $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_]; $rhas_code_block = $self->[_rhas_code_block_]; $rhas_list = $self->[_rhas_list_]; $rhas_ternary = $self->[_rhas_ternary_]; $ris_assigned_structure = $self->[_ris_assigned_structure_]; $ris_broken_container = $self->[_ris_broken_container_]; $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_]; $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; $ris_permanently_broken = $self->[_ris_permanently_broken_]; $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_]; $roverride_cab3 = $self->[_roverride_cab3_]; $rparent_of_seqno = $self->[_rparent_of_seqno_]; $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_]; $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; %K_first_here_doc_by_seqno = (); $last_nonblank_code_type = ';'; $last_nonblank_code_token = ';'; $last_nonblank_block_type = EMPTY_STRING; $last_last_nonblank_code_type = ';'; $last_last_nonblank_code_token = ';'; %seqno_stack = (); %K_old_opening_by_seqno = (); # Note: old K index $depth_next = 0; $depth_next_max = 0; # we will be setting token lengths as we go $cumulative_length = 0; $Ktoken_vars = undef; # the old K value of $rtoken_vars $Kfirst_old = undef; # min K of old line $Klast_old = undef; # max K of old line $Klast_old_code = undef; # K of last token if side comment $CODE_type = EMPTY_STRING; # Set the whitespace flags, which indicate the token spacing preference. $rwhitespace_flags = $self->set_whitespace_flags(); # Note that $K_opening_container and $K_closing_container have values # defined in sub get_line() for the previous K indexes. They were needed # in case option 'indent-only' was set, and we didn't get here. We no # longer need those and will eliminate them now to avoid any possible # mixing of old and new values. This must be done AFTER the call to # set_whitespace_flags, which needs these. $K_opening_container = $self->[_K_opening_container_] = {}; $K_closing_container = $self->[_K_closing_container_] = {}; return; } ## end sub initialize_respace_tokens_closure sub respace_tokens { my $self = shift; #-------------------------------------------------------------------------- # This routine is called once per file to do as much formatting as possible # before new line breaks are set. #-------------------------------------------------------------------------- # Return parameters: # Set $severe_error=true if processing must terminate immediately my ( $severe_error, $rqw_lines ); # We change any spaces in --indent-only mode if ( $rOpts->{'indent-only'} ) { # We need to define lengths for -indent-only to avoid undefs, even # though these values are not actually needed for option --indent-only. $rLL = $self->[_rLL_]; $cumulative_length = 0; foreach my $item ( @{$rLL} ) { my $token = $item->[_TOKEN_]; my $token_length = $length_function ? $length_function->($token) : length($token); $cumulative_length += $token_length; $item->[_TOKEN_LENGTH_] = $token_length; $item->[_CUMULATIVE_LENGTH_] = $cumulative_length; } return ( $severe_error, $rqw_lines ); } # This routine makes all necessary and possible changes to the tokenization # after the initial tokenization of the file. This is a tedious routine, # but basically it consists of inserting and deleting whitespace between # nonblank tokens according to the selected parameters. In a few cases # non-space characters are added, deleted or modified. # The goal of this routine is to create a new token array which only needs # the definition of new line breaks and padding to complete formatting. In # a few cases we have to cheat a little to achieve this goal. In # particular, we may not know if a semicolon will be needed, because it # depends on how the line breaks go. To handle this, we include the # semicolon as a 'phantom' which can be displayed as normal or as an empty # string. # Method: The old tokens are copied one-by-one, with changes, from the old # linear storage array $rLL to a new array $rLL_new. # (re-)initialize closure variables for this problem $self->initialize_respace_tokens_closure(); #-------------------------------- # Main over all lines of the file #-------------------------------- my $rlines = $self->[_rlines_]; my $line_type = EMPTY_STRING; my $last_K_out; foreach my $line_of_tokens ( @{$rlines} ) { my $input_line_number = $line_of_tokens->{_line_number}; my $last_line_type = $line_type; $line_type = $line_of_tokens->{_line_type}; next unless ( $line_type eq 'CODE' ); $CODE_type = $line_of_tokens->{_code_type}; if ( $CODE_type eq 'BL' ) { my $seqno = $seqno_stack{ $depth_next - 1 }; if ( defined($seqno) ) { $self->[_rblank_and_comment_count_]->{$seqno} += 1; $self->set_permanently_broken($seqno) if (!$ris_permanently_broken->{$seqno} && $rOpts_maximum_consecutive_blank_lines ); } } my $rK_range = $line_of_tokens->{_rK_range}; my ( $Kfirst, $Klast ) = @{$rK_range}; next unless defined($Kfirst); ( $Kfirst_old, $Klast_old ) = ( $Kfirst, $Klast ); $Klast_old_code = $Klast_old; # Be sure an old K value is defined for sub store_token $Ktoken_vars = $Kfirst; # Check for correct sequence of token indexes... # An error here means that sub write_line() did not correctly # package the tokenized lines as it received them. If we # get a fault here it has not output a continuous sequence # of K values. Or a line of CODE may have been mis-marked as # something else. There is no good way to continue after such an # error. if ( defined($last_K_out) ) { if ( $Kfirst != $last_K_out + 1 ) { Fault_Warn( "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst" ); $severe_error = 1; return ( $severe_error, $rqw_lines ); } } else { # The first token should always have been given index 0 by sub # write_line() if ( $Kfirst != 0 ) { Fault("Program Bug: first K is $Kfirst but should be 0"); } } $last_K_out = $Klast; # Handle special lines of code if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) { # CODE_types are as follows. # 'BL' = Blank Line # 'VB' = Verbatim - line goes out verbatim # 'FS' = Format Skipping - line goes out verbatim, no blanks # 'IO' = Indent Only - only indentation may be changed # 'NIN' = No Internal Newlines - line does not get broken # 'HSC'=Hanging Side Comment - fix this hanging side comment # 'BC'=Block Comment - an ordinary full line comment # 'SBC'=Static Block Comment - a block comment which does not get # indented # 'SBCX'=Static Block Comment Without Leading Space # 'VER'=VERSION statement # '' or (undefined) - no restrictions # For a hanging side comment we insert an empty quote before # the comment so that it becomes a normal side comment and # will be aligned by the vertical aligner if ( $CODE_type eq 'HSC' ) { # Safety Check: This must be a line with one token (a comment) my $rvars_Kfirst = $rLL->[$Kfirst]; if ( $Kfirst == $Klast && $rvars_Kfirst->[_TYPE_] eq '#' ) { # Note that even if the flag 'noadd-whitespace' is set, we # will make an exception here and allow a blank to be # inserted to push the comment to the right. We can think # of this as an adjustment of indentation rather than # whitespace between tokens. This will also prevent the # hanging side comment from getting converted to a block # comment if whitespace gets deleted, as for example with # the -extrude and -mangle options. my $rcopy = copy_token_as_type( $rvars_Kfirst, 'q', EMPTY_STRING ); $self->store_token($rcopy); $rcopy = copy_token_as_type( $rvars_Kfirst, 'b', SPACE ); $self->store_token($rcopy); $self->store_token($rvars_Kfirst); next; } else { # This line was mis-marked by sub scan_comment. Catch in # DEVEL_MODE, otherwise try to repair and keep going. Fault( "Program bug. A hanging side comment has been mismarked" ) if (DEVEL_MODE); $CODE_type = EMPTY_STRING; $line_of_tokens->{_code_type} = $CODE_type; } } # Copy tokens unchanged foreach my $KK ( $Kfirst .. $Klast ) { $Ktoken_vars = $KK; $self->store_token( $rLL->[$KK] ); } next; } # Handle normal line.. # Define index of last token before any side comment for comma counts my $type_end = $rLL->[$Klast_old_code]->[_TYPE_]; if ( ( $type_end eq '#' || $type_end eq 'b' ) && $Klast_old_code > $Kfirst_old ) { $Klast_old_code--; if ( $rLL->[$Klast_old_code]->[_TYPE_] eq 'b' && $Klast_old_code > $Kfirst_old ) { $Klast_old_code--; } } # Insert any essential whitespace between lines # if last line was normal CODE. # Patch for rt #125012: use K_previous_code rather than '_nonblank' # because comments may disappear. # Note that we must do this even if --noadd-whitespace is set if ( $last_line_type eq 'CODE' ) { my $type_next = $rLL->[$Kfirst]->[_TYPE_]; my $token_next = $rLL->[$Kfirst]->[_TOKEN_]; if ( is_essential_whitespace( $last_last_nonblank_code_token, $last_last_nonblank_code_type, $last_nonblank_code_token, $last_nonblank_code_type, $token_next, $type_next, ) ) { $self->store_token(); } } #----------------------------------------------- # Inner loop to respace tokens on a line of code #----------------------------------------------- # The inner loop is in a separate sub for clarity $self->respace_tokens_inner_loop( $Kfirst, $Klast, $input_line_number ); } # End line loop # finalize data structures $self->respace_post_loop_ops(); # Reset memory to be the new array $self->[_rLL_] = $rLL_new; my $Klimit; if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 } $self->[_Klimit_] = $Klimit; # During development, verify that the new array still looks okay. DEVEL_MODE && $self->check_token_array(); # update the token limits of each line ( $severe_error, $rqw_lines ) = $self->resync_lines_and_tokens(); return ( $severe_error, $rqw_lines ); } ## end sub respace_tokens sub respace_tokens_inner_loop { my ( $self, $Kfirst, $Klast, $input_line_number ) = @_; #----------------------------------------------------------------- # Loop to copy all tokens on one line, making any spacing changes, # while also collecting information needed by later subs. #----------------------------------------------------------------- foreach my $KK ( $Kfirst .. $Klast ) { # TODO: consider eliminating this closure var by passing directly to # store_token following pattern of store_token_to_go. $Ktoken_vars = $KK; my $rtoken_vars = $rLL->[$KK]; my $type = $rtoken_vars->[_TYPE_]; # Handle a blank space ... if ( $type eq 'b' ) { # Delete it if not wanted by whitespace rules # or we are deleting all whitespace # Note that whitespace flag is a flag indicating whether a # white space BEFORE the token is needed next if ( $KK >= $Klast ); # skip terminal blank my $Knext = $KK + 1; if ($rOpts_freeze_whitespace) { $self->store_token($rtoken_vars); next; } my $ws = $rwhitespace_flags->[$Knext]; if ( $ws == -1 || $rOpts_delete_old_whitespace ) { my $token_next = $rLL->[$Knext]->[_TOKEN_]; my $type_next = $rLL->[$Knext]->[_TYPE_]; my $do_not_delete = is_essential_whitespace( $last_last_nonblank_code_token, $last_last_nonblank_code_type, $last_nonblank_code_token, $last_nonblank_code_type, $token_next, $type_next, ); # Note that repeated blanks will get filtered out here next unless ($do_not_delete); } # make it just one character $rtoken_vars->[_TOKEN_] = SPACE; $self->store_token($rtoken_vars); next; } my $token = $rtoken_vars->[_TOKEN_]; # Handle a sequenced token ... i.e. one of ( ) { } [ ] ? : if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) { # One of ) ] } ... if ( $is_closing_token{$token} ) { my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; my $block_type = $rblock_type_of_seqno->{$type_sequence}; #--------------------------------------------- # check for semicolon addition in a code block #--------------------------------------------- if ($block_type) { # if not preceded by a ';' .. if ( $last_nonblank_code_type ne ';' ) { # tentatively insert a semicolon if appropriate $self->add_phantom_semicolon($KK) if $rOpts->{'add-semicolons'}; } } #---------------------------------------------------------- # check for addition/deletion of a trailing comma in a list #---------------------------------------------------------- else { # if this is a list .. my $rtype_count = $rtype_count_by_seqno->{$type_sequence}; if ( $rtype_count && $rtype_count->{','} && !$rtype_count->{';'} && !$rtype_count->{'f'} ) { # if NOT preceded by a comma.. if ( $last_nonblank_code_type ne ',' ) { # insert a comma if requested if ( $rOpts_add_trailing_commas && %trailing_comma_rules ) { $self->add_trailing_comma( $KK, $Kfirst, $trailing_comma_rules{$token} ); } } # if preceded by a comma .. else { # delete a trailing comma if requested my $deleted; if ( $rOpts_delete_trailing_commas && %trailing_comma_rules ) { $deleted = $self->delete_trailing_comma( $KK, $Kfirst, $trailing_comma_rules{$token} ); } # delete a weld-interfering comma if requested if ( !$deleted && $rOpts_delete_weld_interfering_commas && $is_closing_type{ $last_last_nonblank_code_type} ) { $self->delete_weld_interfering_comma($KK); } } } } } } # Modify certain tokens here for whitespace # The following is not yet done, but could be: # sub (x x x) # ( $type =~ /^[witPS]$/ ) elsif ( $is_witPS{$type} ) { # index() is several times faster than a regex test with \s here ## $token =~ /\s/ if ( index( $token, SPACE ) > 0 || index( $token, "\t" ) > 0 ) { # change '$ var' to '$var' etc # change '@ ' to '@' # Examples: <<snippets/space1.in>> my $ord = ord( substr( $token, 1, 1 ) ); if ( # quick test for possible blank at second char $ord > 0 && ( $ord < ORD_PRINTABLE_MIN || $ord > ORD_PRINTABLE_MAX ) ) { my ( $sigil, $word ) = split /\s+/, $token, 2; # $sigil =~ /^[\$\&\%\*\@]$/ ) if ( $is_sigil{$sigil} ) { $token = $sigil; $token .= $word if ( defined($word) ); # fix c104 $rtoken_vars->[_TOKEN_] = $token; } } # trim identifiers of trailing blanks which can occur # under some unusual circumstances, such as if the # identifier 'witch' has trailing blanks on input here: # # sub # witch # () # prototype may be on new line ... # ... my $ord_ch = ord( substr( $token, -1, 1 ) ); if ( # quick check for possible ending space $ord_ch > 0 && ( $ord_ch < ORD_PRINTABLE_MIN || $ord_ch > ORD_PRINTABLE_MAX ) ) { $token =~ s/\s+$//g; $rtoken_vars->[_TOKEN_] = $token; } # Fixed for c250 to use 'S' for sub definitions if ( $type eq 'S' ) { # -spp = 0 : no space before opening prototype paren # -spp = 1 : stable (follow input spacing) # -spp = 2 : always space before opening prototype paren if ( !defined($rOpts_space_prototype_paren) || $rOpts_space_prototype_paren == 1 ) { ## default: stable } elsif ( $rOpts_space_prototype_paren == 0 ) { $token =~ s/\s+\(/\(/; } elsif ( $rOpts_space_prototype_paren == 2 ) { $token =~ s/\(/ (/; } else { # bad n value for -spp=n # just use the default } # one space max, and no tabs $token =~ s/\s+/ /g; $rtoken_vars->[_TOKEN_] = $token; $self->[_ris_special_identifier_token_]->{$token} = 'sub'; } # and trim spaces in package statements (added for c250) elsif ( $type eq 'P' ) { # clean up spaces in package identifiers, like # "package Bob::Dog;" if ( $token =~ s/\s+/ /g ) { $rtoken_vars->[_TOKEN_] = $token; $self->[_ris_special_identifier_token_]->{$token} = 'package'; } } else { # it is rare to arrive here (identifier with spaces) } } } # handle semicolons elsif ( $type eq ';' ) { # Remove unnecessary semicolons, but not after bare # blocks, where it could be unsafe if the brace is # mis-tokenized. if ( $rOpts->{'delete-semicolons'} && ( ( $last_nonblank_block_type && $last_nonblank_code_type eq '}' && ( $is_block_without_semicolon{ $last_nonblank_block_type} || $last_nonblank_block_type =~ /$SUB_PATTERN/ || $last_nonblank_block_type =~ /^\w+:$/ ) ) || $last_nonblank_code_type eq ';' ) ) { # This looks like a deletable semicolon, but even if a # semicolon can be deleted it is not necessarily best to do # so. We apply these additional rules for deletion: # - Always ok to delete a ';' at the end of a line # - Never delete a ';' before a '#' because it would # promote it to a block comment. # - If a semicolon is not at the end of line, then only # delete if it is followed by another semicolon or closing # token. This includes the comment rule. It may take # two passes to get to a final state, but it is a little # safer. For example, keep the first semicolon here: # eval { sub bubba { ok(0) }; ok(0) } || ok(1); # It is not required but adds some clarity. my $ok_to_delete = 1; if ( $KK < $Klast ) { my $Kn = $self->K_next_nonblank($KK); if ( defined($Kn) && $Kn <= $Klast ) { my $next_nonblank_token_type = $rLL->[$Kn]->[_TYPE_]; $ok_to_delete = $next_nonblank_token_type eq ';' || $next_nonblank_token_type eq '}'; } } # do not delete only nonblank token in a file else { my $Kp = $self->K_previous_code( undef, $rLL_new ); my $Kn = $self->K_next_nonblank($KK); $ok_to_delete = defined($Kn) || defined($Kp); } if ($ok_to_delete) { $self->note_deleted_semicolon($input_line_number); next; } else { write_logfile_entry("Extra ';'\n"); } } } # Old patch to add space to something like "x10". # Note: This is now done in the Tokenizer, but this code remains # for reference. elsif ( $type eq 'n' ) { if ( substr( $token, 0, 1 ) eq 'x' && $token =~ /^x\d+/ ) { $token =~ s/x/x /; $rtoken_vars->[_TOKEN_] = $token; if (DEVEL_MODE) { Fault(<<EOM); Near line $input_line_number, Unexpected need to split a token '$token' - this should now be done by the Tokenizer EOM } } } # check for a qw quote elsif ( $type eq 'q' ) { # trim blanks from right of qw quotes # (To avoid trimming qw quotes use -ntqw; the tokenizer handles # this) $token =~ s/\s*$//; $rtoken_vars->[_TOKEN_] = $token; if ( $self->[_save_logfile_] && $token =~ /\t/ ) { $self->note_embedded_tab($input_line_number); } if ( $rwhitespace_flags->[$KK] == WS_YES && @{$rLL_new} && $rLL_new->[-1]->[_TYPE_] ne 'b' && $rOpts_add_whitespace ) { $self->store_token(); } $self->store_token($rtoken_vars); next; } ## end if ( $type eq 'q' ) # delete repeated commas if requested elsif ( $type eq ',' ) { if ( $last_nonblank_code_type eq ',' && $rOpts->{'delete-repeated-commas'} ) { # Could note this deletion as a possible future update: ## $self->note_deleted_comma($input_line_number); next; } # remember input line index of first comma if -wtc is used if (%trailing_comma_rules) { my $seqno = $seqno_stack{ $depth_next - 1 }; if ( defined($seqno) && !defined( $self->[_rfirst_comma_line_index_]->{$seqno} ) ) { $self->[_rfirst_comma_line_index_]->{$seqno} = $rtoken_vars->[_LINE_INDEX_]; } } } # change 'LABEL :' to 'LABEL:' elsif ( $type eq 'J' ) { $token =~ s/\s+//g; $rtoken_vars->[_TOKEN_] = $token; } # check a quote for problems elsif ( $type eq 'Q' ) { $self->check_Q( $KK, $Kfirst, $input_line_number ) if ( $self->[_save_logfile_] ); } else { ## ok - no special processing for this token type } # Store this token with possible previous blank if ( $rwhitespace_flags->[$KK] == WS_YES && @{$rLL_new} && $rLL_new->[-1]->[_TYPE_] ne 'b' && $rOpts_add_whitespace ) { $self->store_token(); } $self->store_token($rtoken_vars); } # End token loop return; } ## end sub respace_tokens_inner_loop sub respace_post_loop_ops { my ($self) = @_; # Walk backwards through the tokens, making forward links to sequence items. if ( @{$rLL_new} ) { my $KNEXT; foreach my $KK ( reverse( 0 .. @{$rLL_new} - 1 ) ) { $rLL_new->[$KK]->[_KNEXT_SEQ_ITEM_] = $KNEXT; if ( $rLL_new->[$KK]->[_TYPE_SEQUENCE_] ) { $KNEXT = $KK } } $self->[_K_first_seq_item_] = $KNEXT; } # Find and remember lists by sequence number foreach my $seqno ( keys %{$K_opening_container} ) { my $K_opening = $K_opening_container->{$seqno}; next unless defined($K_opening); # code errors may leave undefined closing tokens my $K_closing = $K_closing_container->{$seqno}; next unless defined($K_closing); my $lx_open = $rLL_new->[$K_opening]->[_LINE_INDEX_]; my $lx_close = $rLL_new->[$K_closing]->[_LINE_INDEX_]; my $line_diff = $lx_close - $lx_open; $ris_broken_container->{$seqno} = $line_diff; # See if this is a list my $is_list; my $rtype_count = $rtype_count_by_seqno->{$seqno}; if ($rtype_count) { my $comma_count = $rtype_count->{','}; my $fat_comma_count = $rtype_count->{'=>'}; my $semicolon_count = $rtype_count->{';'}; if ( $rtype_count->{'f'} ) { $semicolon_count += $rtype_count->{'f'}; } # We will define a list to be a container with one or more commas # and no semicolons. Note that we have included the semicolons # in a 'for' container in the semicolon count to keep c-style for # statements from being formatted as lists. if ( ( $comma_count || $fat_comma_count ) && !$semicolon_count ) { $is_list = 1; # We need to do one more check for a parenthesized list: # At an opening paren following certain tokens, such as 'if', # we do not want to format the contents as a list. if ( $rLL_new->[$K_opening]->[_TOKEN_] eq '(' ) { my $Kp = $self->K_previous_code( $K_opening, $rLL_new ); if ( defined($Kp) ) { my $type_p = $rLL_new->[$Kp]->[_TYPE_]; my $token_p = $rLL_new->[$Kp]->[_TOKEN_]; $is_list = $type_p eq 'k' ? !$is_nonlist_keyword{$token_p} : !$is_nonlist_type{$type_p}; } } } } # Look for a block brace marked as uncertain. If the tokenizer thinks # its guess is uncertain for the type of a brace following an unknown # bareword then it adds a trailing space as a signal. We can fix the # type here now that we have had a better look at the contents of the # container. This fixes case b1085. To find the corresponding code in # Tokenizer.pm search for 'b1085' with an editor. my $block_type = $rblock_type_of_seqno->{$seqno}; if ( $block_type && substr( $block_type, -1, 1 ) eq SPACE ) { # Always remove the trailing space $block_type =~ s/\s+$//; # Try to filter out parenless sub calls my $Knn1 = $self->K_next_nonblank( $K_opening, $rLL_new ); my $Knn2; if ( defined($Knn1) ) { $Knn2 = $self->K_next_nonblank( $Knn1, $rLL_new ); } my $type_nn1 = defined($Knn1) ? $rLL_new->[$Knn1]->[_TYPE_] : 'b'; my $type_nn2 = defined($Knn2) ? $rLL_new->[$Knn2]->[_TYPE_] : 'b'; # if ( $type_nn1 =~ /^[wU]$/ && $type_nn2 =~ /^[wiqQGCZ]$/ ) { if ( $wU{$type_nn1} && $wiq{$type_nn2} ) { $is_list = 0; } # Convert to a hash brace if it looks like it holds a list if ($is_list) { $block_type = EMPTY_STRING; } $rblock_type_of_seqno->{$seqno} = $block_type; } # Handle a list container if ( $is_list && !$block_type ) { $ris_list_by_seqno->{$seqno} = $seqno; my $seqno_parent = $rparent_of_seqno->{$seqno}; my $depth = 0; while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) { $depth++; # for $rhas_list we need to save the minimum depth if ( !$rhas_list->{$seqno_parent} || $rhas_list->{$seqno_parent} > $depth ) { $rhas_list->{$seqno_parent} = $depth; } if ($line_diff) { $rhas_broken_list->{$seqno_parent} = 1; # Patch1: We need to mark broken lists with non-terminal # line-ending commas for the -bbx=2 parameter. This insures # that the list will stay broken. Otherwise the flag # -bbx=2 can be unstable. This fixes case b789 and b938. # Patch2: Updated to also require either one fat comma or # one more line-ending comma. Fixes cases b1069 b1070 # b1072 b1076. if ( $rlec_count_by_seqno->{$seqno} && ( $rlec_count_by_seqno->{$seqno} > 1 || $rtype_count_by_seqno->{$seqno}->{'=>'} ) ) { $rhas_broken_list_with_lec->{$seqno_parent} = 1; } } $seqno_parent = $rparent_of_seqno->{$seqno_parent}; } } # Handle code blocks ... # The -lp option needs to know if a container holds a code block elsif ( $block_type && $rOpts_line_up_parentheses ) { my $seqno_parent = $rparent_of_seqno->{$seqno}; while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) { $rhas_code_block->{$seqno_parent} = 1; $rhas_broken_code_block->{$seqno_parent} = $line_diff; $seqno_parent = $rparent_of_seqno->{$seqno_parent}; } } else { ## ok - none of the above } } # Find containers with ternaries, needed for -lp formatting. foreach my $seqno ( keys %{$K_opening_ternary} ) { my $seqno_parent = $rparent_of_seqno->{$seqno}; while ( defined($seqno_parent) && $seqno_parent ne SEQ_ROOT ) { $rhas_ternary->{$seqno_parent} = 1; $seqno_parent = $rparent_of_seqno->{$seqno_parent}; } } # Turn off -lp for containers with here-docs with text within a container, # since they have their own fixed indentation. Fixes case b1081. if ($rOpts_line_up_parentheses) { foreach my $seqno ( keys %K_first_here_doc_by_seqno ) { my $Kh = $K_first_here_doc_by_seqno{$seqno}; my $Kc = $K_closing_container->{$seqno}; my $line_Kh = $rLL_new->[$Kh]->[_LINE_INDEX_]; my $line_Kc = $rLL_new->[$Kc]->[_LINE_INDEX_]; next if ( $line_Kh == $line_Kc ); $ris_excluded_lp_container->{$seqno} = 1; } } # Set a flag to turn off -cab=3 in complex structures. Otherwise, # instability can occur. When it is overridden the behavior of the closest # match, -cab=2, will be used instead. This fixes cases b1096 b1113. if ( $rOpts_comma_arrow_breakpoints == 3 ) { foreach my $seqno ( keys %{$K_opening_container} ) { my $rtype_count = $rtype_count_by_seqno->{$seqno}; next unless ( $rtype_count && $rtype_count->{'=>'} ); # override -cab=3 if this contains a sub-list if ( !defined( $roverride_cab3->{$seqno} ) ) { if ( $rhas_list->{$seqno} ) { $roverride_cab3->{$seqno} = 2; } # or if this is a sub-list of its parent container else { my $seqno_parent = $rparent_of_seqno->{$seqno}; if ( defined($seqno_parent) && $ris_list_by_seqno->{$seqno_parent} ) { $roverride_cab3->{$seqno} = 2; } } } } } return; } ## end sub respace_post_loop_ops sub set_permanently_broken { my ( $self, $seqno ) = @_; # Mark this container, and all of its parent containers, as being # permanently broken (for example, by containing a blank line). This # is needed for certain list formatting operations. while ( defined($seqno) ) { $ris_permanently_broken->{$seqno} = 1; $seqno = $rparent_of_seqno->{$seqno}; } return; } ## end sub set_permanently_broken sub store_token { my ( $self, $item ) = @_; #------------------------------------------ # Store one token during respace operations #------------------------------------------ # Input parameter: # if defined => reference to a token # if undef => make and store a blank space # NOTE: called once per token so coding efficiency is critical. # If no arg, then make and store a blank space if ( !$item ) { # - Never start the array with a space, and # - Never store two consecutive spaces if ( @{$rLL_new} && $rLL_new->[-1]->[_TYPE_] ne 'b' ) { # Note that the level and ci_level of newly created spaces should # be the same as the previous token. Otherwise the coding for the # -lp option can create a blinking state in some rare cases. # (see b1109, b1110). $item = []; $item->[_TYPE_] = 'b'; $item->[_TOKEN_] = SPACE; $item->[_TYPE_SEQUENCE_] = EMPTY_STRING; $item->[_LINE_INDEX_] = $rLL_new->[-1]->[_LINE_INDEX_]; $item->[_LEVEL_] = $rLL_new->[-1]->[_LEVEL_]; } else { return } } # The next multiple assignment statements are significantly faster than # doing them one-by-one. my ( $type, $token, $type_sequence, ) = @{$item}[ _TYPE_, _TOKEN_, _TYPE_SEQUENCE_, ]; # Set the token length. Later it may be adjusted again if phantom or # ignoring side comment lengths. It is always okay to calculate the length # with $length_function->() if it is defined, but it is extremely slow so # we avoid it and use the builtin length() for printable ascii tokens. # Note: non-printable ascii characters (like tab) may get different lengths # by the two methods, so we have to use $length_function for them. my $token_length = ( $length_function && !$is_ascii_type{$type} && $token =~ /[[:^ascii:][:^print:]]/ ) ? $length_function->($token) : length($token); # handle blanks if ( $type eq 'b' ) { # Do not output consecutive blanks. This situation should have been # prevented earlier, but it is worth checking because later routines # make this assumption. if ( @{$rLL_new} && $rLL_new->[-1]->[_TYPE_] eq 'b' ) { return; } } # handle comments elsif ( $type eq '#' ) { # trim comments if necessary my $ord = ord( substr( $token, -1, 1 ) ); if ( $ord > 0 && ( $ord < ORD_PRINTABLE_MIN || $ord > ORD_PRINTABLE_MAX ) && $token =~ s/\s+$// ) { $token_length = $length_function ? $length_function->($token) : length($token); $item->[_TOKEN_] = $token; } my $ignore_sc_length = $rOpts_ignore_side_comment_lengths; # Ignore length of '## no critic' comments even if -iscl is not set if ( !$ignore_sc_length && !$rOpts_ignore_perlcritic_comments && $token_length > 10 && substr( $token, 1, 1 ) eq '#' && $token =~ /^##\s*no\s+critic\b/ ) { # Is it a side comment or a block comment? if ( $Ktoken_vars > $Kfirst_old ) { # This is a side comment. If we do not ignore its length, and # -iscl has not been set, then the line could be broken and # perlcritic will complain. So this is essential: $ignore_sc_length ||= 1; # It would be a good idea to also make this behave like a # static side comment, but this is not essential and would # change existing formatting. So we will leave it to the user # to set -ssc if desired. } else { # This is a full-line (block) comment. # It would be a good idea to make this behave like a static # block comment, but this is not essential and would change # existing formatting. So we will leave it to the user to # set -sbc if desired } } # Set length of ignored side comments as just 1 if ( $ignore_sc_length && ( !$CODE_type || $CODE_type eq 'HSC' ) ) { $token_length = 1; } my $seqno = $seqno_stack{ $depth_next - 1 }; if ( defined($seqno) ) { $self->[_rblank_and_comment_count_]->{$seqno} += 1 if ( $CODE_type eq 'BC' ); $self->set_permanently_broken($seqno) if !$ris_permanently_broken->{$seqno}; } } # handle non-blanks and non-comments else { my $block_type; # check for a sequenced item (i.e., container or ?/:) if ($type_sequence) { # This will be the index of this item in the new array my $KK_new = @{$rLL_new}; if ( $is_opening_token{$token} ) { $K_opening_container->{$type_sequence} = $KK_new; $block_type = $rblock_type_of_seqno->{$type_sequence}; # Fix for case b1100: Count a line ending in ', [' as having # a line-ending comma. Otherwise, these commas can be hidden # with something like --opening-square-bracket-right if ( $last_nonblank_code_type eq ',' && $Ktoken_vars == $Klast_old_code && $Ktoken_vars > $Kfirst_old ) { $rlec_count_by_seqno->{$type_sequence}++; } if ( $last_nonblank_code_type eq '=' || $last_nonblank_code_type eq '=>' ) { $ris_assigned_structure->{$type_sequence} = $last_nonblank_code_type; } my $seqno_parent = $seqno_stack{ $depth_next - 1 }; $seqno_parent = SEQ_ROOT unless defined($seqno_parent); push @{ $rchildren_of_seqno->{$seqno_parent} }, $type_sequence; $rparent_of_seqno->{$type_sequence} = $seqno_parent; $seqno_stack{$depth_next} = $type_sequence; $K_old_opening_by_seqno{$type_sequence} = $Ktoken_vars; $depth_next++; if ( $depth_next > $depth_next_max ) { $depth_next_max = $depth_next; } } elsif ( $is_closing_token{$token} ) { $K_closing_container->{$type_sequence} = $KK_new; $block_type = $rblock_type_of_seqno->{$type_sequence}; # Do not include terminal commas in counts if ( $last_nonblank_code_type eq ',' || $last_nonblank_code_type eq '=>' ) { $rtype_count_by_seqno->{$type_sequence} ->{$last_nonblank_code_type}--; if ( $Ktoken_vars == $Kfirst_old && $last_nonblank_code_type eq ',' && $rlec_count_by_seqno->{$type_sequence} ) { $rlec_count_by_seqno->{$type_sequence}--; } } # Update the stack... $depth_next--; } else { # For ternary, note parent but do not include as child my $seqno_parent = $seqno_stack{ $depth_next - 1 }; $seqno_parent = SEQ_ROOT unless defined($seqno_parent); $rparent_of_seqno->{$type_sequence} = $seqno_parent; # These are not yet used but could be useful if ( $token eq '?' ) { $K_opening_ternary->{$type_sequence} = $KK_new; } elsif ( $token eq ':' ) { $K_closing_ternary->{$type_sequence} = $KK_new; } else { # We really shouldn't arrive here, just being cautious: # The only sequenced types output by the tokenizer are the # opening & closing containers and the ternary types. Each # of those was checked above. So we would only get here # if the tokenizer has been changed to mark some other # tokens with sequence numbers. if (DEVEL_MODE) { Fault( "Unexpected token type with sequence number: type='$type', seqno='$type_sequence'" ); } } } } # Remember the most recent two non-blank, non-comment tokens. # NOTE: the phantom semicolon code may change the output stack # without updating these values. Phantom semicolons are considered # the same as blanks for now, but future needs might change that. # See the related note in sub 'add_phantom_semicolon'. $last_last_nonblank_code_type = $last_nonblank_code_type; $last_last_nonblank_code_token = $last_nonblank_code_token; $last_nonblank_code_type = $type; $last_nonblank_code_token = $token; $last_nonblank_block_type = $block_type; # count selected types if ( $is_counted_type{$type} ) { my $seqno = $seqno_stack{ $depth_next - 1 }; if ( defined($seqno) ) { $rtype_count_by_seqno->{$seqno}->{$type}++; # Count line-ending commas for -bbx if ( $type eq ',' && $Ktoken_vars == $Klast_old_code ) { $rlec_count_by_seqno->{$seqno}++; } # Remember index of first here doc target if ( $type eq 'h' && !$K_first_here_doc_by_seqno{$seqno} ) { my $KK_new = @{$rLL_new}; $K_first_here_doc_by_seqno{$seqno} = $KK_new; } } } } # cumulative length is the length sum including this token $cumulative_length += $token_length; $item->[_CUMULATIVE_LENGTH_] = $cumulative_length; $item->[_TOKEN_LENGTH_] = $token_length; # For reference, here is how to get the parent sequence number. # This is not used because it is slower than finding it on the fly # in sub parent_seqno_by_K: # my $seqno_parent = # $type_sequence && $is_opening_token{$token} # ? $seqno_stack{ $depth_next - 2 } # : $seqno_stack{ $depth_next - 1 }; # my $KK = @{$rLL_new}; # $rseqno_of_parent_by_K->{$KK} = $seqno_parent; # and finally, add this item to the new array push @{$rLL_new}, $item; return; } ## end sub store_token sub add_phantom_semicolon { my ( $self, $KK ) = @_; # The token at old index $KK is a closing block brace, and not preceded # by a semicolon. Before we push it onto the new token list, we may # want to add a phantom semicolon which can be activated if the the # block is broken on output. # We are only adding semicolons for certain block types my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; return unless ($type_sequence); my $block_type = $rblock_type_of_seqno->{$type_sequence}; return unless ($block_type); return unless ( $ok_to_add_semicolon_for_block_type{$block_type} || $block_type =~ /^(sub|package)/ || $block_type =~ /^\w+\:$/ ); # Find the most recent token in the new token list my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); return unless ( defined($Kp) ); # shouldn't happen except for bad input my $type_p = $rLL_new->[$Kp]->[_TYPE_]; my $token_p = $rLL_new->[$Kp]->[_TOKEN_]; my $type_sequence_p = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_]; # Do not add a semicolon if... return if ( # it would follow a comment (and be isolated) $type_p eq '#' # it follows a code block ( because they are not always wanted # there and may add clutter) || $type_sequence_p && $rblock_type_of_seqno->{$type_sequence_p} # it would follow a label || $type_p eq 'J' # it would be inside a 'format' statement (and cause syntax error) || ( $type_p eq 'k' && $token_p =~ /format/ ) ); # Do not add a semicolon if it would impede a weld with an immediately # following closing token...like this # { ( some code ) } # ^--No semicolon can go here # look at the previous token... note use of the _NEW rLL array here, # but sequence numbers are invariant. my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_]; # If it is also a CLOSING token we have to look closer... if ( $seqno_inner && $is_closing_token{$token_p} # we only need to look if there is just one inner container.. && defined( $rchildren_of_seqno->{$type_sequence} ) && @{ $rchildren_of_seqno->{$type_sequence} } == 1 ) { # Go back and see if the corresponding two OPENING tokens are also # together. Note that we are using the OLD K indexing here: my $K_outer_opening = $K_old_opening_by_seqno{$type_sequence}; if ( defined($K_outer_opening) ) { my $K_nxt = $self->K_next_nonblank($K_outer_opening); if ( defined($K_nxt) ) { my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_]; # Is the next token after the outer opening the same as # our inner closing (i.e. same sequence number)? # If so, do not insert a semicolon here. return if ( $seqno_nxt && $seqno_nxt == $seqno_inner ); } } } # We will insert an empty semicolon here as a placeholder. Later, if # it becomes the last token on a line, we will bring it to life. The # advantage of doing this is that (1) we just have to check line # endings, and (2) the phantom semicolon has zero width and therefore # won't cause needless breaks of one-line blocks. my $Ktop = -1; if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' && $want_left_space{';'} == WS_NO ) { # convert the blank into a semicolon.. # be careful: we are working on the new stack top # on a token which has been stored. my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE ); # Convert the existing blank to: # a phantom semicolon for one_line_block option = 0 or 1 # a real semicolon for one_line_block option = 2 my $tok = EMPTY_STRING; my $len_tok = 0; if ( $rOpts_one_line_block_semicolons == 2 ) { $tok = ';'; $len_tok = 1; } $rLL_new->[$Ktop]->[_TOKEN_] = $tok; $rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok; $rLL_new->[$Ktop]->[_TYPE_] = ';'; $self->[_rtype_count_by_seqno_]->{$type_sequence}->{';'}++; # NOTE: we are changing the output stack without updating variables # $last_nonblank_code_type, etc. Future needs might require that # those variables be updated here. For now, it seems ok to skip # this. # Then store a new blank $self->store_token($rcopy); } else { # Patch for issue c078: keep line indexes in order. If the top # token is a space that we are keeping (due to '-wls=';') then # we have to check that old line indexes stay in order. # In very rare # instances in which side comments have been deleted and converted # into blanks, we may have filtered down multiple blanks into just # one. In that case the top blank may have a higher line number # than the previous nonblank token. Although the line indexes of # blanks are not really significant, we need to keep them in order # in order to pass error checks. if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' ) { my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_]; my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_]; if ( $new_top_ix < $old_top_ix ) { $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix; } } my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', EMPTY_STRING ); $self->store_token($rcopy); } return; } ## end sub add_phantom_semicolon sub add_trailing_comma { # Implement the --add-trailing-commas flag to the line end before index $KK: my ( $self, $KK, $Kfirst, $trailing_comma_rule ) = @_; # Input parameter: # $KK = index of closing token in old ($rLL) token list # which starts a new line and is not preceded by a comma # $Kfirst = index of first token on the current line of input tokens # $add_flags = user control flags # For example, we might want to add a comma here: # bless { # _name => $name, # _price => $price, # _rebate => $rebate <------ location of possible bare comma # }, $pkg; # ^-------------------closing token at index $KK on new line # Do not add a comma if it would follow a comment my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); return unless ( defined($Kp) ); my $type_p = $rLL_new->[$Kp]->[_TYPE_]; return if ( $type_p eq '#' ); # see if the user wants a trailing comma here my $match = $self->match_trailing_comma_rule( $KK, $Kfirst, $Kp, $trailing_comma_rule, 1 ); # b1458 fix method 1: do not add if this would excess line length. # This is more general than fix method 2, below, but the logic is not # as clean. So this fix is currently deactivated. if ( 0 && $match && $rOpts_delete_trailing_commas && $KK > 0 ) { my $line_index = $rLL->[ $KK - 1 ]->[_LINE_INDEX_]; my $rlines = $self->[_rlines_]; my $line_of_tokens = $rlines->[$line_index]; my $input_line = $line_of_tokens->{_line_text}; my $len = $length_function ? $length_function->($input_line) - 1 : length($input_line) - 1; my $level = $rLL->[$Kfirst]->[_LEVEL_]; my $max_len = $maximum_line_length_at_level[$level]; if ( $len >= $max_len ) { $match = 0; } } # if so, add a comma if ($match) { my $Knew = $self->store_new_token( ',', ',', $Kp ); } return; } ## end sub add_trailing_comma sub delete_trailing_comma { my ( $self, $KK, $Kfirst, $trailing_comma_rule ) = @_; # Apply the --delete-trailing-commas flag to the comma before index $KK # Input parameter: # $KK = index of a closing token in OLD ($rLL) token list # which is preceded by a comma on the same line. # $Kfirst = index of first token on the current line of input tokens # $delete_option = user control flag # Returns true if the comma was deleted # For example, we might want to delete this comma: # my @asset = ("FASMX", "FASGX", "FASIX",); # | |^--------token at index $KK # | ^------comma of interest # ^-------------token at $Kfirst # Verify that the previous token is a comma. Note that we are working in # the new token list $rLL_new. my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); return unless ( defined($Kp) ); if ( $rLL_new->[$Kp]->[_TYPE_] ne ',' ) { # there must be a '#' between the ',' and closing token; give up. return; } # Do not delete commas when formatting under stress to avoid instability. # This fixes b1389, b1390, b1391, b1392. The $high_stress_level has # been found to work well for trailing commas. if ( $rLL_new->[$Kp]->[_LEVEL_] >= $high_stress_level ) { return; } # See if the user wants this trailing comma my $match = $self->match_trailing_comma_rule( $KK, $Kfirst, $Kp, $trailing_comma_rule, 0 ); # Patch: the --noadd-whitespace flag can cause instability in complex # structures. In this case do not delete the comma. Fixes b1409. if ( !$match && !$rOpts_add_whitespace ) { my $Kn = $self->K_next_nonblank($KK); if ( defined($Kn) ) { my $type_n = $rLL->[$Kn]->[_TYPE_]; if ( $type_n ne ';' && $type_n ne '#' ) { return } } } # b1458 fix method 2: do not remove a comma after a leading brace type 'R' # since it is under stress and could become unstable. This is a more # specific fix but the logic is cleaner than method 1. if ( !$match && $rOpts_add_trailing_commas && $rLL->[$Kfirst]->[_TYPE_] eq 'R' ) { # previous old token should be the comma.. my $Kp_old = $self->K_previous_nonblank( $KK, $rLL ); if ( defined($Kp_old) && $Kp_old > $Kfirst && $rLL->[$Kp_old]->[_TYPE_] eq ',' ) { # if the comma follows the first token of the line .. my $Kpp_old = $self->K_previous_nonblank( $Kp_old, $rLL ); if ( defined($Kpp_old) && $Kpp_old eq $Kfirst ) { # do not delete it $match = 1; } } } # If no match, delete it if ( !$match ) { return $self->unstore_last_nonblank_token(','); } return; } ## end sub delete_trailing_comma sub delete_weld_interfering_comma { my ( $self, $KK ) = @_; # Apply the flag '--delete-weld-interfering-commas' to the comma # before index $KK # Input parameter: # $KK = index of a closing token in OLD ($rLL) token list # which is preceded by a comma on the same line. # Returns true if the comma was deleted # For example, we might want to delete this comma: # my $tmpl = { foo => {no_override => 1, default => 42}, }; # || ^------$KK # |^---$Kp # $Kpp---^ # # Note that: # index $KK is in the old $rLL array, but # indexes $Kp and $Kpp are in the new $rLL_new array. my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; return unless ($type_sequence); # Find the previous token and verify that it is a comma. my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); return unless ( defined($Kp) ); if ( $rLL_new->[$Kp]->[_TYPE_] ne ',' ) { # it is not a comma, so give up ( it is probably a '#' ) return; } # This must be the only comma in this list my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence}; return unless ( defined($rtype_count) && $rtype_count->{','} && $rtype_count->{','} == 1 ); # Back up to the previous closing token my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new ); return unless ( defined($Kpp) ); my $seqno_pp = $rLL_new->[$Kpp]->[_TYPE_SEQUENCE_]; my $type_pp = $rLL_new->[$Kpp]->[_TYPE_]; # The containers must be nesting (i.e., sequence numbers must differ by 1 ) if ( $seqno_pp && $is_closing_type{$type_pp} ) { if ( $seqno_pp == $type_sequence + 1 ) { # remove the ',' from the top of the new token list return $self->unstore_last_nonblank_token(','); } } return; } ## end sub delete_weld_interfering_comma sub unstore_last_nonblank_token { my ( $self, $type ) = @_; # remove the most recent nonblank token from the new token list # Input parameter: # $type = type to be removed (for safety check) # Returns true if success # false if error # This was written and is used for removing commas, but might # be useful for other tokens. If it is ever used for other tokens # then the issue of what to do about the other variables, such # as token counts and the '$last...' vars needs to be considered. # Safety check, shouldn't happen if ( @{$rLL_new} < 3 ) { DEVEL_MODE && Fault("not enough tokens on stack to remove '$type'\n"); return; } my ( $rcomma, $rblank ); # case 1: pop comma from top of stack if ( $rLL_new->[-1]->[_TYPE_] eq $type ) { $rcomma = pop @{$rLL_new}; } # case 2: pop blank and then comma from top of stack elsif ($rLL_new->[-1]->[_TYPE_] eq 'b' && $rLL_new->[-2]->[_TYPE_] eq $type ) { $rblank = pop @{$rLL_new}; $rcomma = pop @{$rLL_new}; } # case 3: error, shouldn't happen unless bad call else { DEVEL_MODE && Fault("Could not find token type '$type' to remove\n"); return; } # A note on updating vars set by sub store_token for this comma: If we # reduce the comma count by 1 then we also have to change the variable # $last_nonblank_code_type to be $last_last_nonblank_code_type because # otherwise sub store_token is going to ALSO reduce the comma count. # Alternatively, we can leave the count alone and the # $last_nonblank_code_type alone. Then sub store_token will produce # the correct result. This is simpler and is done here. # Now add a blank space after the comma if appropriate. # Some unusual spacing controls might need another iteration to # reach a final state. if ( $rLL_new->[-1]->[_TYPE_] ne 'b' ) { if ( defined($rblank) ) { $rblank->[_CUMULATIVE_LENGTH_] -= 1; # fix for deleted comma push @{$rLL_new}, $rblank; } } return 1; } ## end sub unstore_last_nonblank_token sub match_trailing_comma_rule { my ( $self, $KK, $Kfirst, $Kp, $trailing_comma_rule, $if_add ) = @_; # Decide if a trailing comma rule is matched. # Input parameter: # $KK = index of closing token in old ($rLL) token list which follows # the location of a possible trailing comma. See diagram below. # $Kfirst = (old) index of first token on the current line of input tokens # $Kp = index of previous nonblank token in new ($rLL_new) array # $trailing_comma_rule = packed user control flags # $if_add = true if adding comma, false if deleting comma # Returns: # false if no match # true if match # For example, we might be checking for addition of a comma here: # bless { # _name => $name, # _price => $price, # _rebate => $rebate <------ location of possible trailing comma # }, $pkg; # ^-------------------closing token at index $KK return unless ($trailing_comma_rule); my ( $trailing_comma_style, $paren_flag ) = @{$trailing_comma_rule}; # List of $trailing_comma_style values: # undef stable: do not change # '0' : no list should have a trailing comma # '1' or '*' : every list should have a trailing comma # 'm' a multi-line list should have a trailing commas # 'b' trailing commas should be 'bare' (comma followed by newline) # 'h' lists of key=>value pairs with a bare trailing comma # 'i' same as s=h but also include any list with no more than about one # comma per line # ' ' or -wtc not defined : leave trailing commas unchanged [DEFAULT]. # Note: an interesting generalization would be to let an upper case # letter denote the negation of styles 'm', 'b', 'h', 'i'. This might # be useful for undoing operations. It would be implemented as a wrapper # around this routine. #----------------------------------------- # No style defined : do not add or delete #----------------------------------------- if ( !defined($trailing_comma_style) ) { return !$if_add } #---------------------------------------- # Set some flags describing this location #---------------------------------------- my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; return unless ($type_sequence); my $closing_token = $rLL->[$KK]->[_TOKEN_]; my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence}; return unless ( defined($rtype_count) && $rtype_count->{','} ); my $is_permanently_broken = $self->[_ris_permanently_broken_]->{$type_sequence}; # Note that _ris_broken_container_ also stores the line diff # but it is not available at this early stage. my $K_opening = $self->[_K_opening_container_]->{$type_sequence}; return if ( !defined($K_opening) ); # multiline definition 1: opening and closing tokens on different lines my $iline_o = $rLL_new->[$K_opening]->[_LINE_INDEX_]; my $iline_c = $rLL->[$KK]->[_LINE_INDEX_]; my $line_diff_containers = $iline_c - $iline_o; my $has_multiline_containers = $line_diff_containers > 0; # multiline definition 2: first and last commas on different lines my $iline_first = $self->[_rfirst_comma_line_index_]->{$type_sequence}; my $iline_last = $rLL_new->[$Kp]->[_LINE_INDEX_]; my $has_multiline_commas; my $line_diff_commas = 0; if ( !defined($iline_first) ) { # shouldn't happen if caller checked comma count my $type_kp = $rLL_new->[$Kp]->[_TYPE_]; Fault( "at line $iline_last but line of first comma not defined, at Kp=$Kp, type=$type_kp\n" ) if (DEVEL_MODE); } else { $line_diff_commas = $iline_last - $iline_first; $has_multiline_commas = $line_diff_commas > 0; } # To avoid instability in edge cases, when adding commas we uses the # multiline_commas definition, but when deleting we use multiline # containers. This fixes b1384, b1396, b1397, b1398, b1400. my $is_multiline = $if_add ? $has_multiline_commas : $has_multiline_containers; my $is_bare_multiline_comma = $is_multiline && $KK == $Kfirst; my $match; #---------------------------- # 0 : does not match any list #---------------------------- if ( $trailing_comma_style eq '0' ) { $match = 0; } #------------------------------ # '*' or '1' : matches any list #------------------------------ elsif ( $trailing_comma_style eq '*' || $trailing_comma_style eq '1' ) { $match = 1; } #----------------------------- # 'm' matches a Multiline list #----------------------------- elsif ( $trailing_comma_style eq 'm' ) { $match = $is_multiline; } #---------------------------------- # 'b' matches a Bare trailing comma #---------------------------------- elsif ( $trailing_comma_style eq 'b' ) { $match = $is_bare_multiline_comma; } #-------------------------------------------------------------------------- # 'h' matches a bare hash list with about 1 comma and 1 fat comma per line. # 'i' matches a bare stable list with about 1 comma per line. #-------------------------------------------------------------------------- elsif ( $trailing_comma_style eq 'h' || $trailing_comma_style eq 'i' ) { # We can treat these together because they are similar. # The set of 'i' matches includes the set of 'h' matches. # the trailing comma must be bare for both 'h' and 'i' return if ( !$is_bare_multiline_comma ); # There must be no more than one comma per line for both 'h' and 'i' # The new_comma_count here will include the trailing comma. my $new_comma_count = $rtype_count->{','}; $new_comma_count += 1 if ($if_add); my $excess_commas = $new_comma_count - $line_diff_commas - 1; if ( $excess_commas > 0 ) { # Exception for a special edge case for option 'i': if the trailing # comma is followed by a blank line or comment, then it cannot be # covered. Then we can safely accept a small list to avoid # instability (issue b1443). if ( $trailing_comma_style eq 'i' && $iline_c - $rLL_new->[$Kp]->[_LINE_INDEX_] > 1 && $new_comma_count <= 2 ) { $match = 1; } # Patch for instability issue b1456: -boc can trick this test; so # skip it when deleting commas to avoid possible instability # with option 'h' in combination with -atc -dtc -boc; elsif ( $trailing_comma_style eq 'h' # this is a deletion (due to -dtc) && !$if_add # -atc is also set && $rOpts_add_trailing_commas # -boc is set and active && $rOpts_break_at_old_comma_breakpoints && !$rOpts_ignore_old_breakpoints ) { # ignore this test } else { return; } } # a list of key=>value pairs with at least 2 fat commas is a match # for both 'h' and 'i' my $fat_comma_count = $rtype_count->{'=>'}; if ( !$match && $fat_comma_count && $fat_comma_count >= 2 ) { # comma count (including trailer) and fat comma count must differ by # by no more than 1. This allows for some small variations. my $comma_diff = $new_comma_count - $fat_comma_count; $match = ( $comma_diff >= -1 && $comma_diff <= 1 ); } # For 'i' only, a list that can be shown to be stable is a match if ( !$match && $trailing_comma_style eq 'i' ) { $match = ( $is_permanently_broken || ( $rOpts_break_at_old_comma_breakpoints && !$rOpts_ignore_old_breakpoints ) ); } } #------------------------------------------------------------------------- # Unrecognized parameter. This should have been caught in the input check. #------------------------------------------------------------------------- else { DEVEL_MODE && Fault("Unrecognized parameter '$trailing_comma_style'\n"); # do not add or delete return !$if_add; } # Now do any special paren check if ( $match && $paren_flag && $paren_flag ne '1' && $paren_flag ne '*' && $closing_token eq ')' ) { $match &&= $self->match_paren_control_flag( $type_sequence, $paren_flag, $rLL_new ); } # Fix for b1379, b1380, b1381, b1382, b1384 part 1. Mark trailing commas # for use by -vtc logic to avoid instability when -dtc and -atc are both # active. if ($match) { if ( $if_add && $rOpts_delete_trailing_commas || !$if_add && $rOpts_add_trailing_commas ) { $self->[_ris_bare_trailing_comma_by_seqno_]->{$type_sequence} = 1; # The combination of -atc and -dtc and -cab=3 can be unstable # (b1394). So we deactivate -cab=3 in this case. # A value of '0' or '4' is required for stability of case b1451. if ( $rOpts_comma_arrow_breakpoints == 3 ) { $self->[_roverride_cab3_]->{$type_sequence} = 0; } } } return $match; } ## end sub match_trailing_comma_rule sub store_new_token { my ( $self, $type, $token, $Kp ) = @_; # Create and insert a completely new token into the output stream # Input parameters: # $type = the token type # $token = the token text # $Kp = index of the previous token in the new list, $rLL_new # Returns: # $Knew = index in $rLL_new of the new token # This operation is a little tricky because we are creating a new token and # we have to take care to follow the requested whitespace rules. my $Ktop = @{$rLL_new} - 1; my $top_is_space = $Ktop >= 0 && $rLL_new->[$Ktop]->[_TYPE_] eq 'b'; my $Knew; if ( $top_is_space && $want_left_space{$type} == WS_NO ) { #---------------------------------------------------- # Method 1: Convert the top blank into the new token. #---------------------------------------------------- # Be Careful: we are working on the top of the new stack, on a token # which has been stored. my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE ); $Knew = $Ktop; $rLL_new->[$Knew]->[_TOKEN_] = $token; $rLL_new->[$Knew]->[_TOKEN_LENGTH_] = length($token); $rLL_new->[$Knew]->[_TYPE_] = $type; # NOTE: we are changing the output stack without updating variables # $last_nonblank_code_type, etc. Future needs might require that # those variables be updated here. For now, we just update the # type counts as necessary. if ( $is_counted_type{$type} ) { my $seqno = $seqno_stack{ $depth_next - 1 }; if ($seqno) { $self->[_rtype_count_by_seqno_]->{$seqno}->{$type}++; } } # Then store a new blank $self->store_token($rcopy); } else { #---------------------------------------- # Method 2: Use the normal storage method #---------------------------------------- # Patch for issue c078: keep line indexes in order. If the top # token is a space that we are keeping (due to '-wls=...) then # we have to check that old line indexes stay in order. # In very rare # instances in which side comments have been deleted and converted # into blanks, we may have filtered down multiple blanks into just # one. In that case the top blank may have a higher line number # than the previous nonblank token. Although the line indexes of # blanks are not really significant, we need to keep them in order # in order to pass error checks. if ($top_is_space) { my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_]; my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_]; if ( $new_top_ix < $old_top_ix ) { $rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix; } } my $rcopy = copy_token_as_type( $rLL_new->[$Kp], $type, $token ); $self->store_token($rcopy); $Knew = @{$rLL_new} - 1; } return $Knew; } ## end sub store_new_token sub check_Q { # Check that a quote looks okay, and report possible problems # to the logfile. my ( $self, $KK, $Kfirst, $line_number ) = @_; my $token = $rLL->[$KK]->[_TOKEN_]; if ( $token =~ /\t/ ) { $self->note_embedded_tab($line_number); } # The remainder of this routine looks for something like # '$var = s/xxx/yyy/;' # in case it should have been '$var =~ s/xxx/yyy/;' # Start by looking for a token beginning with one of: s y m / tr return unless ( $is_s_y_m_slash{ substr( $token, 0, 1 ) } || substr( $token, 0, 2 ) eq 'tr' ); # ... and preceded by one of: = == != my $Kp = $self->K_previous_nonblank( undef, $rLL_new ); return unless ( defined($Kp) ); my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_]; return unless ( $is_unexpected_equals{$previous_nonblank_type} ); my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_]; my $previous_nonblank_type_2 = 'b'; my $previous_nonblank_token_2 = EMPTY_STRING; my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new ); if ( defined($Kpp) ) { $previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_]; $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_]; } my $next_nonblank_token = EMPTY_STRING; my $Kn = $KK + 1; my $Kmax = @{$rLL} - 1; if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 } if ( $Kn <= $Kmax ) { $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_]; } my $token_0 = $rLL->[$Kfirst]->[_TOKEN_]; my $type_0 = $rLL->[$Kfirst]->[_TYPE_]; if ( # preceded by simple scalar $previous_nonblank_type_2 eq 'i' && $previous_nonblank_token_2 =~ /^\$/ # followed by some kind of termination # (but give complaint if we can not see far enough ahead) && $next_nonblank_token =~ /^[; \)\}]$/ # scalar is not declared ## =~ /^(my|our|local)$/ && !( $type_0 eq 'k' && $is_my_our_local{$token_0} ) ) { my $lno = 1 + $rLL_new->[$Kp]->[_LINE_INDEX_]; my $guess = substr( $previous_nonblank_token, 0, 1 ) . '~'; complain( "Line $lno: Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n" ); } return; } ## end sub check_Q } ## end closure respace_tokens sub copy_token_as_type { # This provides a quick way to create a new token by # slightly modifying an existing token. my ( $rold_token, $type, $token ) = @_; if ( !defined($token) ) { if ( $type eq 'b' ) { $token = SPACE; } elsif ( $type eq 'q' ) { $token = EMPTY_STRING; } elsif ( $type eq '->' ) { $token = '->'; } elsif ( $type eq ';' ) { $token = ';'; } elsif ( $type eq ',' ) { $token = ','; } else { # Unexpected type ... this sub will work as long as both $token and # $type are defined, but we should catch any unexpected types during # development. if (DEVEL_MODE) { Fault(<<EOM); sub 'copy_token_as_type' received token type '$type' but expects just one of: 'b' 'q' '->' or ';' EOM } # Shouldn't get here $token = $type; } } my @rnew_token = @{$rold_token}; $rnew_token[_TYPE_] = $type; $rnew_token[_TOKEN_] = $token; $rnew_token[_TYPE_SEQUENCE_] = EMPTY_STRING; return \@rnew_token; } ## end sub copy_token_as_type sub K_next_code { my ( $self, $KK, $rLL ) = @_; # return the index K of the next nonblank, non-comment token return if ( !defined($KK) ); return if ( $KK < 0 ); # use the standard array unless given otherwise $rLL = $self->[_rLL_] if ( !defined($rLL) ); my $Num = @{$rLL}; my $Knnb = $KK + 1; while ( $Knnb < $Num ) { if ( !defined( $rLL->[$Knnb] ) ) { # We seem to have encountered a gap in our array. # This shouldn't happen because sub write_line() pushed # items into the $rLL array. Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE); return; } if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' && $rLL->[$Knnb]->[_TYPE_] ne '#' ) { return $Knnb; } $Knnb++; } return; } ## end sub K_next_code sub K_next_nonblank { my ( $self, $KK, $rLL ) = @_; # return the index K of the next nonblank token, or # return undef if none return if ( !defined($KK) ); return if ( $KK < 0 ); # The third arg allows this routine to be used on any array. This is # useful in sub respace_tokens when we are copying tokens from an old $rLL # to a new $rLL array. But usually the third arg will not be given and we # will just use the $rLL array in $self. $rLL = $self->[_rLL_] if ( !defined($rLL) ); my $Num = @{$rLL}; my $Knnb = $KK + 1; return if ( $Knnb >= $Num ); return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ); return if ( ++$Knnb >= $Num ); return $Knnb if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ); # Backup loop. Very unlikely to get here; it means we have neighboring # blanks in the token stream. $Knnb++; while ( $Knnb < $Num ) { # Safety check, this fault shouldn't happen: The $rLL array is the # main array of tokens, so all entries should be used. It is # initialized in sub write_line, and then re-initialized by sub # store_token() within sub respace_tokens. Tokens are pushed on # so there shouldn't be any gaps. if ( !defined( $rLL->[$Knnb] ) ) { Fault("Undefined entry for k=$Knnb") if (DEVEL_MODE); return; } if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb } $Knnb++; } return; } ## end sub K_next_nonblank sub K_previous_code { # return the index K of the previous nonblank, non-comment token # Call with $KK=undef to start search at the top of the array my ( $self, $KK, $rLL ) = @_; # use the standard array unless given otherwise $rLL = $self->[_rLL_] unless ( defined($rLL) ); my $Num = @{$rLL}; if ( !defined($KK) ) { $KK = $Num } if ( $KK > $Num ) { # This fault can be caused by a programming error in which a bad $KK is # given. The caller should make the first call with KK_new=undef to # avoid this error. Fault( "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num" ) if (DEVEL_MODE); return; } my $Kpnb = $KK - 1; while ( $Kpnb >= 0 ) { if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' && $rLL->[$Kpnb]->[_TYPE_] ne '#' ) { return $Kpnb; } $Kpnb--; } return; } ## end sub K_previous_code sub K_previous_nonblank { # return index of previous nonblank token before item K; # Call with $KK=undef to start search at the top of the array my ( $self, $KK, $rLL ) = @_; # use the standard array unless given otherwise $rLL = $self->[_rLL_] unless ( defined($rLL) ); my $Num = @{$rLL}; if ( !defined($KK) ) { $KK = $Num } if ( $KK > $Num ) { # This fault can be caused by a programming error in which a bad $KK is # given. The caller should make the first call with KK_new=undef to # avoid this error. Fault( "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num" ) if (DEVEL_MODE); return; } my $Kpnb = $KK - 1; return if ( $Kpnb < 0 ); return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ); return if ( --$Kpnb < 0 ); return $Kpnb if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ); # Backup loop. We should not get here unless some routine # slipped repeated blanks into the token stream. return if ( --$Kpnb < 0 ); while ( $Kpnb >= 0 ) { if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb } $Kpnb--; } return; } ## end sub K_previous_nonblank sub parent_seqno_by_K { # Return the sequence number of the parent container of token K, if any. my ( $self, $KK ) = @_; my $rLL = $self->[_rLL_]; # The task is to jump forward to the next container token # and use the sequence number of either it or its parent. # For example, consider the following with seqno=5 of the '[' and ']' # being called with index K of the first token of each line: # # result # push @tests, # - # [ # - # sub { 99 }, 'do {&{%s} for 1,2}', # 5 # '(&{})(&{})', undef, # 5 # [ 2, 2, 0 ], 0 # 5 # ]; # - # NOTE: The ending parent will be SEQ_ROOT for a balanced file. For # unbalanced files, last sequence number will either be undefined or it may # be at a deeper level. In either case we will just return SEQ_ROOT to # have a defined value and allow formatting to proceed. my $parent_seqno = SEQ_ROOT; my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_]; if ($type_sequence) { $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence}; } else { my $Kt = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_]; if ( defined($Kt) ) { $type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_]; my $type = $rLL->[$Kt]->[_TYPE_]; # if next container token is closing, it is the parent seqno if ( $is_closing_type{$type} ) { $parent_seqno = $type_sequence; } # otherwise we want its parent container else { $parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence}; } } } $parent_seqno = SEQ_ROOT if ( !defined($parent_seqno) ); return $parent_seqno; } ## end sub parent_seqno_by_K sub is_in_block_by_i { my ( $self, $i ) = @_; # returns true if # token at i is contained in a BLOCK # or is at root level # or there is some kind of error (i.e. unbalanced file) # returns false otherwise if ( $i < 0 ) { DEVEL_MODE && Fault("Bad call, i='$i'\n"); return 1; } my $seqno = $parent_seqno_to_go[$i]; return 1 if ( !$seqno || $seqno eq SEQ_ROOT ); return 1 if ( $self->[_rblock_type_of_seqno_]->{$seqno} ); return; } ## end sub is_in_block_by_i sub is_in_list_by_i { my ( $self, $i ) = @_; # returns true if token at i is contained in a LIST # returns false otherwise my $seqno = $parent_seqno_to_go[$i]; return if ( !$seqno ); return if ( $seqno eq SEQ_ROOT ); if ( $self->[_ris_list_by_seqno_]->{$seqno} ) { return 1; } return; } ## end sub is_in_list_by_i sub is_list_by_K { # Return true if token K is in a list my ( $self, $KK ) = @_; my $parent_seqno = $self->parent_seqno_by_K($KK); return unless defined($parent_seqno); return $self->[_ris_list_by_seqno_]->{$parent_seqno}; } ## end sub is_list_by_K sub is_list_by_seqno { # Return true if the immediate contents of a container appears to be a # list. my ( $self, $seqno ) = @_; return unless defined($seqno); return $self->[_ris_list_by_seqno_]->{$seqno}; } ## end sub is_list_by_seqno sub resync_lines_and_tokens { my $self = shift; # Re-construct the arrays of tokens associated with the original input # lines since they have probably changed due to inserting and deleting # blanks and a few other tokens. # Return parameters: # set severe_error = true if processing needs to terminate my $severe_error; my $rqw_lines = []; my $rLL = $self->[_rLL_]; my $Klimit = $self->[_Klimit_]; my $rlines = $self->[_rlines_]; my @Krange_code_without_comments; my @Klast_valign_code; # This is the next token and its line index: my $Knext = 0; my $Kmax = defined($Klimit) ? $Klimit : -1; # Verify that old line indexes are in still order. If this error occurs, # check locations where sub 'respace_tokens' creates new tokens (like # blank spaces). It must have set a bad old line index. if ( DEVEL_MODE && defined($Klimit) ) { my $iline = $rLL->[0]->[_LINE_INDEX_]; foreach my $KK ( 1 .. $Klimit ) { my $iline_last = $iline; $iline = $rLL->[$KK]->[_LINE_INDEX_]; if ( $iline < $iline_last ) { my $KK_m = $KK - 1; my $token_m = $rLL->[$KK_m]->[_TOKEN_]; my $token = $rLL->[$KK]->[_TOKEN_]; my $type_m = $rLL->[$KK_m]->[_TYPE_]; my $type = $rLL->[$KK]->[_TYPE_]; Fault(<<EOM); Line indexes out of order at index K=$KK: at KK-1 =$KK_m: old line=$iline_last, type='$type_m', token='$token_m' at KK =$KK: old line=$iline, type='$type', token='$token', EOM } } } my $iline = -1; foreach my $line_of_tokens ( @{$rlines} ) { $iline++; my $line_type = $line_of_tokens->{_line_type}; if ( $line_type eq 'CODE' ) { # Get the old number of tokens on this line my $rK_range_old = $line_of_tokens->{_rK_range}; my ( $Kfirst_old, $Klast_old ) = @{$rK_range_old}; my $Kdiff_old = 0; if ( defined($Kfirst_old) ) { $Kdiff_old = $Klast_old - $Kfirst_old; } # Find the range of NEW K indexes for the line: # $Kfirst = index of first token on line # $Klast = index of last token on line my ( $Kfirst, $Klast ); my $Knext_beg = $Knext; # this will be $Kfirst if we find tokens # Optimization: Although the actual K indexes may be completely # changed after respacing, the number of tokens on any given line # will often be nearly unchanged. So we will see if we can start # our search by guessing that the new line has the same number # of tokens as the old line. my $Knext_guess = $Knext + $Kdiff_old; if ( $Knext_guess > $Knext && $Knext_guess < $Kmax && $rLL->[$Knext_guess]->[_LINE_INDEX_] <= $iline ) { # the guess is good, so we can start our search here $Knext = $Knext_guess + 1; } while ($Knext <= $Kmax && $rLL->[$Knext]->[_LINE_INDEX_] <= $iline ) { $Knext++; } if ( $Knext > $Knext_beg ) { $Klast = $Knext - 1; # Delete any terminal blank token if ( $rLL->[$Klast]->[_TYPE_] eq 'b' ) { $Klast -= 1 } if ( $Klast < $Knext_beg ) { $Klast = undef; } else { $Kfirst = $Knext_beg; # Save ranges of non-comment code. This will be used by # sub keep_old_line_breaks. if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) { push @Krange_code_without_comments, [ $Kfirst, $Klast ]; } # Only save ending K indexes of code types which are blank # or 'VER'. These will be used for a convergence check. # See related code in sub 'convey_batch_to_vertical_aligner' my $CODE_type = $line_of_tokens->{_code_type}; if ( !$CODE_type || $CODE_type eq 'VER' ) { push @Klast_valign_code, $Klast; } } } # It is only safe to trim the actual line text if the input # line had a terminal blank token. Otherwise, we may be # in a quote. if ( $line_of_tokens->{_ended_in_blank_token} ) { $line_of_tokens->{_line_text} =~ s/\s+$//; } $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ]; # Deleting semicolons can create new empty code lines # which should be marked as blank if ( !defined($Kfirst) ) { my $CODE_type = $line_of_tokens->{_code_type}; if ( !$CODE_type ) { $line_of_tokens->{_code_type} = 'BL'; } } else { #--------------------------------------------------- # save indexes of all lines with a 'q' at either end # for later use by sub find_multiline_qw #--------------------------------------------------- if ( $rLL->[$Kfirst]->[_TYPE_] eq 'q' || $rLL->[$Klast]->[_TYPE_] eq 'q' ) { push @{$rqw_lines}, $iline; } } } } # There shouldn't be any nodes beyond the last one. This routine is # relinking lines and tokens after the tokens have been respaced. A fault # here indicates some kind of bug has been introduced into the above loops. # There is not good way to keep going; we better stop here. if ( $Knext <= $Kmax ) { Fault_Warn( "unexpected tokens at end of file when reconstructing lines"); $severe_error = 1; return ( $severe_error, $rqw_lines ); } $self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments; # Setup the convergence test in the FileWriter based on line-ending indexes my $file_writer_object = $self->[_file_writer_object_]; $file_writer_object->setup_convergence_test( \@Klast_valign_code ); return ( $severe_error, $rqw_lines ); } ## end sub resync_lines_and_tokens sub check_for_old_break { my ( $self, $KK, $rkeep_break_hash, $rbreak_hash ) = @_; # This sub is called to help implement flags: # --keep-old-breakpoints-before and --keep-old-breakpoints-after # Given: # $KK = index of a token, # $rkeep_break_hash = user control for --keep-old-... # $rbreak_hash = hash of tokens where breaks are requested # Set $rbreak_hash as follows if a user break is requested: # = 1 make a hard break (flush the current batch) # best for something like leading commas (-kbb=',') # = 2 make a soft break (keep building current batch) # best for something like leading -> my $rLL = $self->[_rLL_]; my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_]; # non-container tokens use the type as the key if ( !$seqno ) { my $type = $rLL->[$KK]->[_TYPE_]; if ( $rkeep_break_hash->{$type} ) { $rbreak_hash->{$KK} = $is_soft_keep_break_type{$type} ? 2 : 1; } } # container tokens use the token as the key else { my $token = $rLL->[$KK]->[_TOKEN_]; my $flag = $rkeep_break_hash->{$token}; if ($flag) { my $match = $flag eq '1' || $flag eq '*'; # check for special matching codes if ( !$match ) { if ( $token eq '(' || $token eq ')' ) { $match = $self->match_paren_control_flag( $seqno, $flag ); } elsif ( $token eq '{' || $token eq '}' ) { # These tentative codes 'b' and 'B' for brace types are # placeholders for possible future brace types. They # are not documented and may be changed. my $block_type = $self->[_rblock_type_of_seqno_]->{$seqno}; if ( $flag eq 'b' ) { $match = $block_type } elsif ( $flag eq 'B' ) { $match = !$block_type } else { # unknown code - no match } } else { ## ok: none of the above } } if ($match) { my $type = $rLL->[$KK]->[_TYPE_]; $rbreak_hash->{$KK} = $is_soft_keep_break_type{$type} ? 2 : 1; } } } return; } ## end sub check_for_old_break sub keep_old_line_breaks { # Called once per file to find and mark any old line breaks which # should be kept. We will be translating the input hashes into # token indexes. # A flag is set as follows: # = 1 make a hard break (flush the current batch) # best for something like leading commas (-kbb=',') # = 2 make a soft break (keep building current batch) # best for something like leading -> my ($self) = @_; my $rLL = $self->[_rLL_]; my $rKrange_code_without_comments = $self->[_rKrange_code_without_comments_]; my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_]; my $rbreak_after_Klast = $self->[_rbreak_after_Klast_]; my $rbreak_container = $self->[_rbreak_container_]; #---------------------------------------- # Apply --break-at-old-method-breakpoints #---------------------------------------- # This code moved here from sub break_lists to fix b1120 if ( $rOpts->{'break-at-old-method-breakpoints'} ) { foreach my $item ( @{$rKrange_code_without_comments} ) { my ( $Kfirst, $Klast ) = @{$item}; my $type = $rLL->[$Kfirst]->[_TYPE_]; my $token = $rLL->[$Kfirst]->[_TOKEN_]; # leading '->' use a value of 2 which causes a soft # break rather than a hard break if ( $type eq '->' ) { $rbreak_before_Kfirst->{$Kfirst} = 2; } # leading ')->' use a special flag to insure that both # opening and closing parens get opened # Fix for b1120: only for parens, not braces elsif ( $token eq ')' ) { my $Kn = $self->K_next_nonblank($Kfirst); next if ( !defined($Kn) ); next if ( $Kn > $Klast ); next if ( $rLL->[$Kn]->[_TYPE_] ne '->' ); my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_]; next if ( !$seqno ); # Note: in previous versions there was a fix here to avoid # instability between conflicting -bom and -pvt or -pvtc flags. # The fix skipped -bom for a small line difference. But this # was troublesome, and instead the fix has been moved to # sub set_vertical_tightness_flags where priority is given to # the -bom flag over -pvt and -pvtc flags. Both opening and # closing paren flags are involved because even though -bom only # requests breaking before the closing paren, automated logic # opens the opening paren when the closing paren opens. # Relevant cases are b977, b1215, b1270, b1303 $rbreak_container->{$seqno} = 1; } else { ## ok: not a special case } } } #--------------------------------------------------------------------- # Apply --keep-old-breakpoints-before and --keep-old-breakpoints-after #--------------------------------------------------------------------- return unless ( %keep_break_before_type || %keep_break_after_type ); foreach my $item ( @{$rKrange_code_without_comments} ) { my ( $Kfirst, $Klast ) = @{$item}; $self->check_for_old_break( $Kfirst, \%keep_break_before_type, $rbreak_before_Kfirst ); $self->check_for_old_break( $Klast, \%keep_break_after_type, $rbreak_after_Klast ); } return; } ## end sub keep_old_line_breaks sub weld_containers { # Called once per file to do any welding operations requested by --weld* # flags. my ($self) = @_; # This count is used to eliminate needless calls for weld checks elsewhere $total_weld_count = 0; return if ( $rOpts->{'indent-only'} ); return unless ($rOpts_add_newlines); # Important: sub 'weld_cuddled_blocks' must be called before # sub 'weld_nested_containers'. This is because the cuddled option needs to # use the original _LEVEL_ values of containers, but the weld nested # containers changes _LEVEL_ of welded containers. # Here is a good test case to be sure that both cuddling and welding # are working and not interfering with each other: <<snippets/ce_wn1.in>> # perltidy -wn -ce # if ($BOLD_MATH) { ( # $labels, $comment, # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' ) # ) } else { ( # &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ), # $after # ) } $self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} ); if ( $rOpts->{'weld-nested-containers'} ) { $self->weld_nested_containers(); $self->weld_nested_quotes(); } #------------------------------------------------------------- # All welding is done. Finish setting up weld data structures. #------------------------------------------------------------- my $rLL = $self->[_rLL_]; my $rK_weld_left = $self->[_rK_weld_left_]; my $rK_weld_right = $self->[_rK_weld_right_]; my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_]; my @K_multi_weld; my @keys = keys %{$rK_weld_right}; $total_weld_count = @keys; # First pass to process binary welds. # This loop is processed in unsorted order for efficiency. foreach my $Kstart (@keys) { my $Kend = $rK_weld_right->{$Kstart}; # An error here would be due to an incorrect initialization introduced # in one of the above weld routines, like sub weld_nested. if ( $Kend <= $Kstart ) { Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n") if (DEVEL_MODE); next; } # Set weld values for all tokens this welded pair foreach ( $Kstart + 1 .. $Kend ) { $rK_weld_left->{$_} = $Kstart; } foreach my $Kx ( $Kstart .. $Kend - 1 ) { $rK_weld_right->{$Kx} = $Kend; $rweld_len_right_at_K->{$Kx} = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - $rLL->[$Kx]->[_CUMULATIVE_LENGTH_]; } # Remember the leftmost index of welds which continue to the right if ( defined( $rK_weld_right->{$Kend} ) && !defined( $rK_weld_left->{$Kstart} ) ) { push @K_multi_weld, $Kstart; } } # Second pass to process chains of welds (these are rare). # This has to be processed in sorted order. if (@K_multi_weld) { my $Kend = -1; foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) { # Skip any interior K which was originally missing a left link next if ( $Kstart <= $Kend ); # Find the end of this chain $Kend = $rK_weld_right->{$Kstart}; my $Knext = $rK_weld_right->{$Kend}; while ( defined($Knext) ) { $Kend = $Knext; $Knext = $rK_weld_right->{$Kend}; } # Set weld values this chain foreach ( $Kstart + 1 .. $Kend ) { $rK_weld_left->{$_} = $Kstart; } foreach my $Kx ( $Kstart .. $Kend - 1 ) { $rK_weld_right->{$Kx} = $Kend; $rweld_len_right_at_K->{$Kx} = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - $rLL->[$Kx]->[_CUMULATIVE_LENGTH_]; } } } return; } ## end sub weld_containers sub cumulative_length_before_K { my ( $self, $KK ) = @_; # Returns the cumulative character length from the first token to # token before the token at index $KK. my $rLL = $self->[_rLL_]; return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; } sub weld_cuddled_blocks { my ($self) = @_; # Called once per file to handle cuddled formatting my $rK_weld_left = $self->[_rK_weld_left_]; my $rK_weld_right = $self->[_rK_weld_right_]; my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; # This routine implements the -cb flag by finding the appropriate # closing and opening block braces and welding them together. return unless ( %{$rcuddled_block_types} ); my $rLL = $self->[_rLL_]; return unless ( defined($rLL) && @{$rLL} ); my $rbreak_container = $self->[_rbreak_container_]; my $ris_broken_container = $self->[_ris_broken_container_]; my $ris_cuddled_closing_brace = $self->[_ris_cuddled_closing_brace_]; my $K_closing_container = $self->[_K_closing_container_]; # A stack to remember open chains at all levels: This is a hash rather than # an array for safety because negative levels can occur in files with # errors. This allows us to keep processing with negative levels. # $in_chain{$level} = [$chain_type, $type_sequence]; my %in_chain; my $CBO = $rOpts->{'cuddled-break-option'}; # loop over structure items to find cuddled pairs my $level = 0; my $KNEXT = $self->[_K_first_seq_item_]; while ( defined($KNEXT) ) { my $KK = $KNEXT; $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_]; my $rtoken_vars = $rLL->[$KK]; my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; if ( !$type_sequence ) { next if ( $KK == 0 ); # first token in file may not be container # A fault here implies that an error was made in the little loop at # the bottom of sub 'respace_tokens' which set the values of # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the # loop control lines above. Fault("sequence = $type_sequence not defined at K=$KK") if (DEVEL_MODE); next; } # NOTE: we must use the original levels here. They can get changed # by sub 'weld_nested_containers', so this routine must be called # before sub 'weld_nested_containers'. my $last_level = $level; $level = $rtoken_vars->[_LEVEL_]; if ( $level < $last_level ) { $in_chain{$last_level} = undef } elsif ( $level > $last_level ) { $in_chain{$level} = undef } else { ## ok - ($level == $last_level) } # We are only looking at code blocks my $token = $rtoken_vars->[_TOKEN_]; my $type = $rtoken_vars->[_TYPE_]; next unless ( $type eq $token ); if ( $token eq '{' ) { my $block_type = $rblock_type_of_seqno->{$type_sequence}; if ( !$block_type ) { # patch for unrecognized block types which may not be labeled my $Kp = $self->K_previous_nonblank($KK); while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) { $Kp = $self->K_previous_nonblank($Kp); } next unless $Kp; $block_type = $rLL->[$Kp]->[_TOKEN_]; } if ( $in_chain{$level} ) { # we are in a chain and are at an opening block brace. # See if we are welding this opening brace with the previous # block brace. Get their identification numbers: my $closing_seqno = $in_chain{$level}->[1]; my $opening_seqno = $type_sequence; # The preceding block must be on multiple lines so that its # closing brace will start a new line. if ( !$ris_broken_container->{$closing_seqno} && !$rbreak_container->{$closing_seqno} ) { next unless ( $CBO == 2 ); $rbreak_container->{$closing_seqno} = 1; } # We can weld the closing brace to its following word .. my $Ko = $K_closing_container->{$closing_seqno}; my $Kon; if ( defined($Ko) ) { $Kon = $self->K_next_nonblank($Ko); } # ..unless it is a comment if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) { # OK to weld these two tokens... $rK_weld_right->{$Ko} = $Kon; $rK_weld_left->{$Kon} = $Ko; # Set flag that we want to break the next container # so that the cuddled line is balanced. $rbreak_container->{$opening_seqno} = 1 if ($CBO); # Remember which braces are cuddled. # The closing brace is used to set adjusted indentations. # The opening brace is not yet used but might eventually # be needed in setting adjusted indentation. $ris_cuddled_closing_brace->{$closing_seqno} = 1; } } else { # We are not in a chain. Start a new chain if we see the # starting block type. if ( $rcuddled_block_types->{$block_type} ) { $in_chain{$level} = [ $block_type, $type_sequence ]; } else { $block_type = '*'; $in_chain{$level} = [ $block_type, $type_sequence ]; } } } elsif ( $token eq '}' ) { if ( $in_chain{$level} ) { # We are in a chain at a closing brace. See if this chain # continues.. my $Knn = $self->K_next_code($KK); next unless $Knn; my $chain_type = $in_chain{$level}->[0]; my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_]; if ( $rcuddled_block_types->{$chain_type}->{$next_nonblank_token} ) { # Note that we do not weld yet because we must wait until # we we are sure that an opening brace for this follows. $in_chain{$level}->[1] = $type_sequence; } else { $in_chain{$level} = undef } } } else { ## ok - not a curly brace } } return; } ## end sub weld_cuddled_blocks sub find_nested_pairs { my $self = shift; # This routine is called once per file to do preliminary work needed for # the --weld-nested option. This information is also needed for adding # semicolons. my $rLL = $self->[_rLL_]; return unless ( defined($rLL) && @{$rLL} ); my $Num = @{$rLL}; my $K_opening_container = $self->[_K_opening_container_]; my $K_closing_container = $self->[_K_closing_container_]; my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; # We define an array of pairs of nested containers my @nested_pairs; # Names of calling routines can either be marked as 'i' or 'w', # and they may invoke a sub call with an '->'. We will consider # any consecutive string of such types as a single unit when making # weld decisions. We also allow a leading ! my $is_name_type = { 'i' => 1, 'w' => 1, 'U' => 1, '->' => 1, '!' => 1, }; # Loop over all closing container tokens foreach my $inner_seqno ( keys %{$K_closing_container} ) { my $K_inner_closing = $K_closing_container->{$inner_seqno}; # See if it is immediately followed by another, outer closing token my $K_outer_closing = $K_inner_closing + 1; $K_outer_closing += 1 if ( $K_outer_closing < $Num && $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' ); next if ( $K_outer_closing >= $Num ); my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_]; next if ( !$outer_seqno ); my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_]; next if ( !$is_closing_token{$token_outer_closing} ); # Simple filter: No commas or semicolons in the outer container my $rtype_count = $self->[_rtype_count_by_seqno_]->{$outer_seqno}; if ($rtype_count) { next if ( $rtype_count->{','} || $rtype_count->{';'} ); } # Now we have to check the opening tokens. my $K_outer_opening = $K_opening_container->{$outer_seqno}; my $K_inner_opening = $K_opening_container->{$inner_seqno}; next if ( !defined($K_outer_opening) ); next if ( !defined($K_inner_opening) ); my $inner_blocktype = $rblock_type_of_seqno->{$inner_seqno}; my $outer_blocktype = $rblock_type_of_seqno->{$outer_seqno}; # Verify that the inner opening token is the next container after the # outer opening token. my $K_io_check = $rLL->[$K_outer_opening]->[_KNEXT_SEQ_ITEM_]; next unless defined($K_io_check); if ( $K_io_check != $K_inner_opening ) { # The inner opening container does not immediately follow the outer # opening container, but we may still allow a weld if they are # separated by a sub signature. For example, we may have something # like this, where $K_io_check may be at the first 'x' instead of # 'io'. So we need to hop over the signature and see if we arrive # at 'io'. # oo io # | x x | # $obj->then( sub ( $code ) { # ... # return $c->render(text => '', status => $code); # } ); # | | # ic oc next if ( !$inner_blocktype || $inner_blocktype ne 'sub' ); next if $rLL->[$K_io_check]->[_TOKEN_] ne '('; my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_]; next unless defined($seqno_signature); my $K_signature_closing = $K_closing_container->{$seqno_signature}; next unless defined($K_signature_closing); my $K_test = $rLL->[$K_signature_closing]->[_KNEXT_SEQ_ITEM_]; next unless ( defined($K_test) && $K_test == $K_inner_opening ); # OK, we have arrived at 'io' in the above diagram. We should put # a limit on the length or complexity of the signature here. There # is no perfect way to do this, one way is to put a limit on token # count. For consistency with older versions, we should allow a # signature with a single variable to weld, but not with # multiple variables. A single variable as in 'sub ($code) {' can # have a $Kdiff of 2 to 4, depending on spacing. # But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to # 7, depending on spacing. So to keep formatting consistent with # previous versions, we will also avoid welding if there is a comma # in the signature. my $Kdiff = $K_signature_closing - $K_io_check; next if ( $Kdiff > 4 ); # backup comma count test; but we cannot get here with Kdiff<=4 my $rtc = $self->[_rtype_count_by_seqno_]->{$seqno_signature}; next if ( $rtc && $rtc->{','} ); } # Yes .. this is a possible nesting pair. # They can be separated by a small amount. my $K_diff = $K_inner_opening - $K_outer_opening; # Count the number of nonblank characters separating them. # Note: the $nonblank_count includes the inner opening container # but not the outer opening container, so it will be >= 1. if ( $K_diff < 0 ) { next } # Shouldn't happen my $nonblank_count = 0; my $type; my $is_name; # Here is an example of a long identifier chain which counts as a # single nonblank here (this spans about 10 K indexes): # if ( !Boucherot::SetOfConnections->new->handler->execute( # ^--K_o_o ^--K_i_o # @array) ) my $Kn_first = $K_outer_opening; my $Kn_last_nonblank; my $saw_comment; foreach my $Kn ( $K_outer_opening + 1 .. $K_inner_opening ) { next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' ); if ( !$nonblank_count ) { $Kn_first = $Kn } if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; } $Kn_last_nonblank = $Kn; # skip chain of identifier tokens my $last_type = $type; my $last_is_name = $is_name; $type = $rLL->[$Kn]->[_TYPE_]; if ( $type eq '#' ) { $saw_comment = 1; last } $is_name = $is_name_type->{$type}; next if ( $is_name && $last_is_name ); # do not count a possible leading - of bareword hash key next if ( $type eq 'm' && !$last_type ); $nonblank_count++; last if ( $nonblank_count > 2 ); } # Do not weld across a comment .. fix for c058. next if ($saw_comment); # Patch for b1104: do not weld to a paren preceded by sort/map/grep # because the special line break rules may cause a blinking state if ( defined($Kn_last_nonblank) && $rLL->[$K_inner_opening]->[_TOKEN_] eq '(' && $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' ) { my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_]; # Turn off welding at sort/map/grep ( if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 } } my $token_oo = $rLL->[$K_outer_opening]->[_TOKEN_]; if ( # 1: adjacent opening containers, like: do {{ $nonblank_count == 1 # 2. anonymous sub + prototype or sig: )->then( sub ($code) { # ... but it seems best not to stack two structural blocks, like # this # sub make_anon_with_my_sub { sub { # because it probably hides the structure a little too much. || ( $inner_blocktype && $inner_blocktype eq 'sub' && $rLL->[$Kn_first]->[_TOKEN_] eq 'sub' && !$outer_blocktype ) # 3. short item following opening paren, like: fun( yyy ( || $nonblank_count == 2 && $token_oo eq '(' # 4. weld around fat commas, if requested (git #108), such as # elf->call_method( method_name_foo => { || ( $type eq '=>' && $nonblank_count <= 3 && %weld_fat_comma_rules && $weld_fat_comma_rules{$token_oo} ) ) { push @nested_pairs, [ $inner_seqno, $outer_seqno, $K_inner_closing ]; } next; } # The weld routine expects the pairs in order in the form # [$seqno_inner, $seqno_outer] # And they must be in the same order as the inner closing tokens # (otherwise, welds of three or more adjacent tokens will not work). The K # value of this inner closing token has temporarily been stored for # sorting. @nested_pairs = # Drop the K index after sorting (it would cause trouble downstream) map { [ $_->[0], $_->[1] ] } # Sort on the K values sort { $a->[2] <=> $b->[2] } @nested_pairs; return \@nested_pairs; } ## end sub find_nested_pairs sub match_paren_control_flag { # Decide if this paren is excluded by user request: # undef matches no parens # '*' matches all parens # 'k' matches only if the previous nonblank token is a perl builtin # keyword (such as 'if', 'while'), # 'K' matches if 'k' does not, meaning if the previous token is not a # keyword. # 'f' matches if the previous token is a function other than a keyword. # 'F' matches if 'f' does not. # 'w' matches if either 'k' or 'f' match. # 'W' matches if 'w' does not. my ( $self, $seqno, $flag, $rLL ) = @_; # Input parameters: # $seqno = sequence number of the container (should be paren) # $flag = the flag which defines what matches # $rLL = an optional alternate token list needed for respace operations $rLL = $self->[_rLL_] unless ( defined($rLL) ); return 0 unless ( defined($flag) ); return 0 if $flag eq '0'; return 1 if $flag eq '1'; return 1 if $flag eq '*'; return 0 unless ($seqno); my $K_opening = $self->[_K_opening_container_]->{$seqno}; return unless ( defined($K_opening) ); my ( $is_f, $is_k, $is_w ); my $Kp = $self->K_previous_nonblank( $K_opening, $rLL ); if ( defined($Kp) ) { my $type_p = $rLL->[$Kp]->[_TYPE_]; # keyword? $is_k = $type_p eq 'k'; # function call? $is_f = $self->[_ris_function_call_paren_]->{$seqno}; # either keyword or function call? $is_w = $is_k || $is_f; } my $match; if ( $flag eq 'k' ) { $match = $is_k } elsif ( $flag eq 'K' ) { $match = !$is_k } elsif ( $flag eq 'f' ) { $match = $is_f } elsif ( $flag eq 'F' ) { $match = !$is_f } elsif ( $flag eq 'w' ) { $match = $is_w } elsif ( $flag eq 'W' ) { $match = !$is_w } else { ## no match } return $match; } ## end sub match_paren_control_flag sub is_excluded_weld { # decide if this weld is excluded by user request my ( $self, $KK, $is_leading ) = @_; my $rLL = $self->[_rLL_]; my $rtoken_vars = $rLL->[$KK]; my $token = $rtoken_vars->[_TOKEN_]; my $rflags = $weld_nested_exclusion_rules{$token}; return 0 unless ( defined($rflags) ); my $flag = $is_leading ? $rflags->[0] : $rflags->[1]; return 0 unless ( defined($flag) ); return 1 if $flag eq '*'; my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_]; return $self->match_paren_control_flag( $seqno, $flag ); } ## end sub is_excluded_weld # hashes to simplify welding logic my %type_ok_after_bareword; my %has_tight_paren; BEGIN { # types needed for welding RULE 6 my @q = qw# => -> { ( [ #; @type_ok_after_bareword{@q} = (1) x scalar(@q); # these types do not 'like' to be separated from a following paren @q = qw(w i q Q G C Z U); @{has_tight_paren}{@q} = (1) x scalar(@q); } ## end BEGIN use constant DEBUG_WELD => 0; sub setup_new_weld_measurements { # Define quantities to check for excess line lengths when welded. # Called by sub 'weld_nested_containers' and sub 'weld_nested_quotes' my ( $self, $Kouter_opening, $Kinner_opening ) = @_; # Given indexes of outer and inner opening containers to be welded: # $Kouter_opening, $Kinner_opening # Returns these variables: # $new_weld_ok = true (new weld ok) or false (do not start new weld) # $starting_indent = starting indentation # $starting_lentot = starting cumulative length # $msg = diagnostic message for debugging my $rLL = $self->[_rLL_]; my $rlines = $self->[_rlines_]; my $starting_level; my $starting_ci; my $starting_lentot; my $maximum_text_length; my $msg = EMPTY_STRING; my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_]; my $rK_range = $rlines->[$iline_oo]->{_rK_range}; my ( $Kfirst, $Klast ) = @{$rK_range}; #------------------------------------------------------------------------- # We now define a reference index, '$Kref', from which to start measuring # This choice turns out to be critical for keeping welds stable during # iterations, so we go through a number of STEPS... #------------------------------------------------------------------------- # STEP 1: Our starting guess is to use measure from the first token of the # current line. This is usually a good guess. my $Kref = $Kfirst; # STEP 2: See if we should go back a little farther my $Kprev = $self->K_previous_nonblank($Kfirst); if ( defined($Kprev) ) { # Avoid measuring from between an opening paren and a previous token # which should stay close to it ... fixes b1185 my $token_oo = $rLL->[$Kouter_opening]->[_TOKEN_]; my $type_prev = $rLL->[$Kprev]->[_TYPE_]; if ( $Kouter_opening == $Kfirst && $token_oo eq '(' && $has_tight_paren{$type_prev} ) { $Kref = $Kprev; } # Back up and count length from a token like '=' or '=>' if -lp # is used (this fixes b520) # ...or if a break is wanted before there elsif ($rOpts_line_up_parentheses || $want_break_before{$type_prev} ) { # If there are other sequence items between the start of this line # and the opening token in question, then do not include tokens on # the previous line in length calculations. This check added to # fix case b1174 which had a '?' on the line my $no_previous_seq_item = $Kref == $Kouter_opening || $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_] == $Kouter_opening; if ( $no_previous_seq_item && substr( $type_prev, 0, 1 ) eq '=' ) { $Kref = $Kprev; # Fix for b1144 and b1112: backup to the first nonblank # character before the =>, or to the start of its line. if ( $type_prev eq '=>' ) { my $iline_prev = $rLL->[$Kprev]->[_LINE_INDEX_]; my $rK_range_prev = $rlines->[$iline_prev]->{_rK_range}; my ( $Kfirst_prev, $Klast_prev ) = @{$rK_range_prev}; foreach my $KK ( reverse( $Kfirst_prev .. $Kref - 1 ) ) { next if ( $rLL->[$KK]->[_TYPE_] eq 'b' ); $Kref = $KK; last; } } } } else { ## ok } } # STEP 3: Now look ahead for a ternary and, if found, use it. # This fixes case b1182. # Also look for a ')' at the same level and, if found, use it. # This fixes case b1224. if ( $Kref < $Kouter_opening ) { my $Knext = $rLL->[$Kref]->[_KNEXT_SEQ_ITEM_]; my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_]; while ( $Knext < $Kouter_opening ) { if ( $rLL->[$Knext]->[_LEVEL_] == $level_oo ) { if ( $is_ternary{ $rLL->[$Knext]->[_TYPE_] } || $rLL->[$Knext]->[_TOKEN_] eq ')' ) { $Kref = $Knext; last; } } $Knext = $rLL->[$Knext]->[_KNEXT_SEQ_ITEM_]; } } # Define the starting measurements we will need $starting_lentot = $Kref <= 0 ? 0 : $rLL->[ $Kref - 1 ]->[_CUMULATIVE_LENGTH_]; $starting_level = $rLL->[$Kref]->[_LEVEL_]; $starting_ci = $rLL->[$Kref]->[_CI_LEVEL_]; $maximum_text_length = $maximum_text_length_at_level[$starting_level] - $starting_ci * $rOpts_continuation_indentation; # STEP 4: Switch to using the outer opening token as the reference # point if a line break before it would make a longer line. # Fixes case b1055 and is also an alternate fix for b1065. my $starting_level_oo = $rLL->[$Kouter_opening]->[_LEVEL_]; if ( $Kref < $Kouter_opening ) { my $starting_ci_oo = $rLL->[$Kouter_opening]->[_CI_LEVEL_]; my $lentot_oo = $rLL->[ $Kouter_opening - 1 ]->[_CUMULATIVE_LENGTH_]; my $maximum_text_length_oo = $maximum_text_length_at_level[$starting_level_oo] - $starting_ci_oo * $rOpts_continuation_indentation; # The excess length to any cumulative length K = lenK is either # $excess = $lenk - ($lentot + $maximum_text_length), or # $excess = $lenk - ($lentot_oo + $maximum_text_length_oo), # so the worst case (maximum excess) corresponds to the configuration # with minimum value of the sum: $lentot + $maximum_text_length if ( $lentot_oo + $maximum_text_length_oo < $starting_lentot + $maximum_text_length ) { $Kref = $Kouter_opening; $starting_level = $starting_level_oo; $starting_ci = $starting_ci_oo; $starting_lentot = $lentot_oo; $maximum_text_length = $maximum_text_length_oo; } } my $new_weld_ok = 1; # STEP 5, fix b1020: Avoid problem areas with the -wn -lp combination. The # combination -wn -lp -dws -naws does not work well and can cause blinkers. # It will probably only occur in stress testing. For this situation we # will only start a new weld if we start at a 'good' location. # - Added 'if' to fix case b1032. # - Require blank before certain previous characters to fix b1111. # - Add ';' to fix case b1139 # - Convert from '$ok_to_weld' to '$new_weld_ok' to fix b1162. # - relaxed constraints for b1227 # - added skip if type is 'q' for b1349 and b1350 b1351 b1352 b1353 # - added skip if type is 'Q' for b1447 if ( $starting_ci && $rOpts_line_up_parentheses && $rOpts_delete_old_whitespace && !$rOpts_add_whitespace && $rLL->[$Kinner_opening]->[_TYPE_] ne 'q' && $rLL->[$Kinner_opening]->[_TYPE_] ne 'Q' && defined($Kprev) ) { my $type_first = $rLL->[$Kfirst]->[_TYPE_]; my $token_first = $rLL->[$Kfirst]->[_TOKEN_]; my $type_prev = $rLL->[$Kprev]->[_TYPE_]; my $type_pp = 'b'; if ( $Kprev >= 0 ) { $type_pp = $rLL->[ $Kprev - 1 ]->[_TYPE_] } my $is_good_location = $type_prev =~ /^[\,\.\;]/ || ( $type_prev =~ /^[=\{\[\(\L]/ && ( $type_pp eq 'b' || $type_pp eq '}' || $type_first eq 'k' ) ) || $type_first =~ /^[=\,\.\;\{\[\(\L]/ || $type_first eq '||' || ( $type_first eq 'k' && ( $token_first eq 'if' || $token_first eq 'or' ) ); if ( !$is_good_location ) { $msg = "Skipping weld: poor break with -lp and ci at type_first='$type_first' type_prev='$type_prev' type_pp=$type_pp\n"; $new_weld_ok = 0; } } return ( $new_weld_ok, $maximum_text_length, $starting_lentot, $msg ); } ## end sub setup_new_weld_measurements sub excess_line_length_for_Krange { my ( $self, $Kfirst, $Klast ) = @_; # returns $excess_length = # by how many characters a line composed of tokens $Kfirst .. $Klast will # exceed the allowed line length my $rLL = $self->[_rLL_]; my $length_before_Kfirst = $Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_]; # backup before a side comment if necessary my $Kend = $Klast; if ( $rOpts_ignore_side_comment_lengths && $rLL->[$Klast]->[_TYPE_] eq '#' ) { my $Kprev = $self->K_previous_nonblank($Klast); if ( defined($Kprev) && $Kprev >= $Kfirst ) { $Kend = $Kprev } } # get the length of the text my $length = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - $length_before_Kfirst; # get the size of the text window my $level = $rLL->[$Kfirst]->[_LEVEL_]; my $ci_level = $rLL->[$Kfirst]->[_CI_LEVEL_]; my $max_text_length = $maximum_text_length_at_level[$level] - $ci_level * $rOpts_continuation_indentation; my $excess_length = $length - $max_text_length; DEBUG_WELD && print "Kfirst=$Kfirst, Klast=$Klast, Kend=$Kend, level=$level, ci=$ci_level, max_text_length=$max_text_length, length=$length\n"; return ($excess_length); } ## end sub excess_line_length_for_Krange sub weld_nested_containers { my ($self) = @_; # Called once per file for option '--weld-nested-containers' my $rK_weld_left = $self->[_rK_weld_left_]; my $rK_weld_right = $self->[_rK_weld_right_]; # This routine implements the -wn flag by "welding together" # the nested closing and opening tokens which were previously # identified by sub 'find_nested_pairs'. "welding" simply # involves setting certain hash values which will be checked # later during formatting. my $rLL = $self->[_rLL_]; my $rlines = $self->[_rlines_]; my $K_opening_container = $self->[_K_opening_container_]; my $K_closing_container = $self->[_K_closing_container_]; my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; my $ris_asub_block = $self->[_ris_asub_block_]; my $rmax_vertical_tightness = $self->[_rmax_vertical_tightness_]; my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'}; # Find nested pairs of container tokens for any welding. my $rnested_pairs = $self->find_nested_pairs(); # Return unless there are nested pairs to weld return unless defined($rnested_pairs) && @{$rnested_pairs}; # NOTE: It would be nice to apply RULE 5 right here by deleting unwanted # pairs. But it isn't clear if this is possible because we don't know # which sequences might actually start a weld. my $rOpts_break_at_old_method_breakpoints = $rOpts->{'break-at-old-method-breakpoints'}; # This array will hold the sequence numbers of the tokens to be welded. my @welds; # Variables needed for estimating line lengths my $maximum_text_length; # maximum spaces available for text my $starting_lentot; # cumulative text to start of current line my $iline_outer_opening = -1; my $weld_count_this_start = 0; my $weld_starts_in_block = 0; # OLD: $single_line_tol added to fix cases b1180 b1181 # = $rOpts_continuation_indentation > $rOpts_indent_columns ? 1 : 0; # NEW: $single_line_tol=0 fixes b1212; and b1180-1181 work ok now # =1 for -vmll and -lp; fixes b1452, b1453, b1454 # NOTE: the combination -vmll and -lp can be unstable, especially when # also combined with -wn. It may eventually be necessary to turn off -vmll # if -lp is set. For now, this works. The value '1' is a minimum which # works but can be increased if necessary. my $single_line_tol = $rOpts_variable_maximum_line_length && $rOpts_line_up_parentheses ? 1 : 0; my $multiline_tol = $single_line_tol + 1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation ); # Define a welding cutoff level: do not start a weld if the inside # container level equals or exceeds this level. # We use the minimum of two criteria, either of which may be more # restrictive. The 'alpha' value is more restrictive in (b1206, b1252) and # the 'beta' value is more restrictive in other cases (b1243). # Reduced beta term from beta+3 to beta+2 to fix b1401. Previously: # my $weld_cutoff_level = min($stress_level_alpha, $stress_level_beta + 2); # This is now '$high_stress_level'. # The vertical tightness flags can throw off line length calculations. # This patch was added to fix instability issue b1284. # It works to always use a tol of 1 for 1 line block length tests, but # this restricted value keeps test case wn6.wn working as before. # It may be necessary to include '[' and '{' here in the future. my $one_line_tol = $opening_vertical_tightness{'('} ? 1 : 0; # Abbreviations: # _oo=outer opening, i.e. first of { { # _io=inner opening, i.e. second of { { # _oc=outer closing, i.e. second of } { # _ic=inner closing, i.e. first of } } my $previous_pair; # Main loop over nested pairs... # We are working from outermost to innermost pairs so that # level changes will be complete when we arrive at the inner pairs. while ( my $item = pop( @{$rnested_pairs} ) ) { my ( $inner_seqno, $outer_seqno ) = @{$item}; my $Kouter_opening = $K_opening_container->{$outer_seqno}; my $Kinner_opening = $K_opening_container->{$inner_seqno}; my $Kouter_closing = $K_closing_container->{$outer_seqno}; my $Kinner_closing = $K_closing_container->{$inner_seqno}; # RULE: do not weld if inner container has <= 3 tokens unless the next # token is a heredoc (so we know there will be multiple lines) if ( $Kinner_closing - $Kinner_opening <= 4 ) { my $Knext_nonblank = $self->K_next_nonblank($Kinner_opening); next unless defined($Knext_nonblank); my $type = $rLL->[$Knext_nonblank]->[_TYPE_]; next unless ( $type eq 'h' ); } my $outer_opening = $rLL->[$Kouter_opening]; my $inner_opening = $rLL->[$Kinner_opening]; my $outer_closing = $rLL->[$Kouter_closing]; my $inner_closing = $rLL->[$Kinner_closing]; # RULE: do not weld to a hash brace. The reason is that it has a very # strong bond strength to the next token, so a line break after it # may not work. Previously we allowed welding to something like @{ # but that caused blinking states (cases b751, b779). if ( $inner_opening->[_TYPE_] eq 'L' ) { next; } # RULE: do not weld to a square bracket which does not contain commas if ( $inner_opening->[_TYPE_] eq '[' ) { my $rtype_count = $self->[_rtype_count_by_seqno_]->{$inner_seqno}; next unless ( $rtype_count && $rtype_count->{','} ); # Do not weld if there is text before a '[' such as here: # curr_opt ( @beg [2,5] ) # It will not break into the desired sandwich structure. # This fixes case b109, 110. my $Kdiff = $Kinner_opening - $Kouter_opening; next if ( $Kdiff > 2 ); next if ( $Kdiff == 2 && $rLL->[ $Kouter_opening + 1 ]->[_TYPE_] ne 'b' ); } # RULE: Avoid welding under stress. The idea is that we need to have a # little space* within a welded container to avoid instability. Note # that after each weld the level values are reduced, so long multiple # welds can still be made. This rule will seldom be a limiting factor # in actual working code. Fixes b1206, b1243. my $inner_level = $inner_opening->[_LEVEL_]; if ( $inner_level >= $high_stress_level ) { next } # Set flag saying if this pair starts a new weld my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] ); # Set flag saying if this pair is adjacent to the previous nesting pair # (even if previous pair was rejected as a weld) my $touch_previous_pair = defined($previous_pair) && $outer_seqno == $previous_pair->[0]; $previous_pair = $item; my $do_not_weld_rule = 0; my $Msg = EMPTY_STRING; my $is_one_line_weld; my $iline_oo = $outer_opening->[_LINE_INDEX_]; my $iline_io = $inner_opening->[_LINE_INDEX_]; my $iline_ic = $inner_closing->[_LINE_INDEX_]; my $iline_oc = $outer_closing->[_LINE_INDEX_]; my $token_oo = $outer_opening->[_TOKEN_]; my $token_io = $inner_opening->[_TOKEN_]; # DO-NOT-WELD RULE 7: Do not weld if this conflicts with -bom # Added for case b973. Moved here from below to fix b1423. if ( !$do_not_weld_rule && $rOpts_break_at_old_method_breakpoints && $iline_io > $iline_oo ) { foreach my $iline ( $iline_oo + 1 .. $iline_io ) { my $rK_range = $rlines->[$iline]->{_rK_range}; next unless defined($rK_range); my ( $Kfirst, $Klast ) = @{$rK_range}; next unless defined($Kfirst); if ( $rLL->[$Kfirst]->[_TYPE_] eq '->' ) { $do_not_weld_rule = 7; last; } } } next if ($do_not_weld_rule); # Turn off vertical tightness at possible one-line welds. Fixes b1402, # b1419, b1421, b1424, b1425. This also fixes issues b1338, b1339, # b1340, b1341, b1342, b1343, which previously used a separate fix. # Issue c161 is the latest and simplest check, using # $iline_ic==$iline_io as the test. if ( %opening_vertical_tightness && $iline_ic == $iline_io && $opening_vertical_tightness{$token_oo} ) { $rmax_vertical_tightness->{$outer_seqno} = 0; } my $is_multiline_weld = $iline_oo == $iline_io && $iline_ic == $iline_oc && $iline_io != $iline_ic; if (DEBUG_WELD) { my $len_oo = $rLL->[$Kouter_opening]->[_CUMULATIVE_LENGTH_]; my $len_io = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_]; $Msg .= <<EOM; Pair seqo=$outer_seqno seqi=$inner_seqno lines: loo=$iline_oo lio=$iline_io lic=$iline_ic loc=$iline_oc Koo=$Kouter_opening Kio=$Kinner_opening Kic=$Kinner_closing Koc=$Kouter_closing lenoo=$len_oo lenio=$len_io tokens '$token_oo' .. '$token_io' EOM } # DO-NOT-WELD RULE 0: # Avoid a new paren-paren weld if inner parens are 'sheared' (separated # by one line). This can produce instabilities (fixes b1250 b1251 # 1256). if ( !$is_multiline_weld && $iline_ic == $iline_io + 1 && $token_oo eq '(' && $token_io eq '(' ) { if (DEBUG_WELD) { $Msg .= "RULE 0: Not welding due to sheared inner parens\n"; print {*STDOUT} $Msg; } next; } # If this pair is not adjacent to the previous pair (skipped or not), # then measure lengths from the start of line of oo. if ( !$touch_previous_pair # Also do this if restarting at a new line; fixes case b965, s001 || ( !$weld_count_this_start && $iline_oo > $iline_outer_opening ) ) { # Remember the line we are using as a reference $iline_outer_opening = $iline_oo; $weld_count_this_start = 0; $weld_starts_in_block = 0; ( my $new_weld_ok, $maximum_text_length, $starting_lentot, my $msg ) = $self->setup_new_weld_measurements( $Kouter_opening, $Kinner_opening ); if ( !$new_weld_ok && ( $iline_oo != $iline_io || $iline_ic != $iline_oc ) ) { if (DEBUG_WELD) { print {*STDOUT} $msg } next; } my $rK_range = $rlines->[$iline_oo]->{_rK_range}; my ( $Kfirst, $Klast ) = @{$rK_range}; # An existing one-line weld is a line in which # (1) the containers are all on one line, and # (2) the line does not exceed the allowable length if ( $iline_oo == $iline_oc ) { # All the tokens are on one line, now check their length. # Start with the full line index range. We will reduce this # in the coding below in some cases. my $Kstart = $Kfirst; my $Kstop = $Klast; # Note that the following minimal choice for measuring will # work and will not cause any instabilities because it is # invariant: ## my $Kstart = $Kouter_opening; ## my $Kstop = $Kouter_closing; # But that can lead to some undesirable welds. So a little # more complicated method has been developed. # We are trying to avoid creating bad two-line welds when we are # working on long, previously un-welded input text, such as # INPUT (example of a long input line weld candidate): ## $mutation->transpos( $self->RNA->position($mutation->label, $atg_label)); # GOOD two-line break: (not welded; result marked too long): ## $mutation->transpos( ## $self->RNA->position($mutation->label, $atg_label)); # BAD two-line break: (welded; result if we weld): ## $mutation->transpos($self->RNA->position( ## $mutation->label, $atg_label)); # We can only get an approximate estimate of the final length, # since the line breaks may change, and for -lp mode because # even the indentation is not yet known. my $level_first = $rLL->[$Kfirst]->[_LEVEL_]; my $level_last = $rLL->[$Klast]->[_LEVEL_]; my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_]; my $level_oc = $rLL->[$Kouter_closing]->[_LEVEL_]; # - measure to the end of the original line if balanced # - measure to the closing container if unbalanced (fixes b1230) #if ( $level_first != $level_last ) { $Kstop = $Kouter_closing } if ( $level_oc > $level_last ) { $Kstop = $Kouter_closing } # - measure from the start of the original line if balanced # - measure from the most previous token with same level # if unbalanced (b1232) if ( $Kouter_opening > $Kfirst && $level_oo > $level_first ) { $Kstart = $Kouter_opening; foreach my $KK ( reverse( $Kfirst + 1 .. $Kouter_opening - 1 ) ) { next if ( $rLL->[$KK]->[_TYPE_] eq 'b' ); last if ( $rLL->[$KK]->[_LEVEL_] < $level_oo ); $Kstart = $KK; } } my $excess = $self->excess_line_length_for_Krange( $Kstart, $Kstop ); # Coding simplified here for case b1219. # Increased tol from 0 to 1 when pvt>0 to fix b1284. $is_one_line_weld = $excess <= $one_line_tol; } # DO-NOT-WELD RULE 1: # Do not weld something that looks like the start of a two-line # function call, like this: <<snippets/wn6.in>> # $trans->add_transformation( # PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) ); # We will look for a semicolon after the closing paren. # We want to weld something complex, like this though # my $compass = uc( opposite_direction( line_to_canvas_direction( # @{ $coords[0] }, @{ $coords[1] } ) ) ); # Otherwise we will get a 'blinker'. For example, the following # would become a blinker without this rule: # $Self->_Add( $SortOrderDisplay{ $Field # ->GenerateFieldForSelectSQL() } ); # But it is okay to weld a two-line statement if it looks like # it was already welded, meaning that the two opening containers are # on a different line that the two closing containers. This is # necessary to prevent blinking of something like this with # perltidy -wn -pbp (starting indentation two levels deep): # $top_label->set_text( gettext( # "Unable to create personal directory - check permissions.") ); if ( $iline_oc == $iline_oo + 1 && $iline_io == $iline_ic && $token_oo eq '(' ) { # Look for following semicolon... my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing); my $next_nonblank_type = defined($Knext_nonblank) ? $rLL->[$Knext_nonblank]->[_TYPE_] : 'b'; if ( $next_nonblank_type eq ';' ) { # Then do not weld if no other containers between inner # opening and closing. my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_]; if ( $Knext_seq_item == $Kinner_closing ) { $do_not_weld_rule = 1; } } } } ## end starting new weld sequence else { # set the 1-line flag if continuing a weld sequence; fixes b1239 $is_one_line_weld = ( $iline_oo == $iline_oc ); } # DO-NOT-WELD RULE 2: # Do not weld an opening paren to an inner one line brace block # We will just use old line numbers for this test and require # iterations if necessary for convergence # For example, otherwise we could cause the opening paren # in the following example to separate from the caller name # as here: # $_[0]->code_handler # ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } ); # Here is another example where we do not want to weld: # $wrapped->add_around_modifier( # sub { push @tracelog => 'around 1'; $_[0]->(); } ); # If the one line sub block gets broken due to length or by the # user, then we can weld. The result will then be: # $wrapped->add_around_modifier( sub { # push @tracelog => 'around 1'; # $_[0]->(); # } ); # Updated to fix cases b1082 b1102 b1106 b1115: # Also, do not weld to an intact inner block if the outer opening token # is on a different line. For example, this prevents oscillation # between these two states in case b1106: # return map{ # ($_,[$self->$_(@_[1..$#_])]) # }@every; # return map { ( # $_, [ $self->$_( @_[ 1 .. $#_ ] ) ] # ) } @every; # The effect of this change on typical code is very minimal. Sometimes # it may take a second iteration to converge, but this gives protection # against blinking. if ( !$do_not_weld_rule && !$is_one_line_weld && $iline_ic == $iline_io ) { $do_not_weld_rule = 2 if ( $token_oo eq '(' || $iline_oo != $iline_io ); } # DO-NOT-WELD RULE 2A: # Do not weld an opening asub brace in -lp mode if -asbl is set. This # helps avoid instabilities in one-line block formation, and fixes # b1241. Previously, the '$is_one_line_weld' flag was tested here # instead of -asbl, and this fixed most cases. But it turns out that # the real problem was the -asbl flag, and switching to this was # necessary to fixe b1268. This also fixes b1269, b1277, b1278. if ( !$do_not_weld_rule && $rOpts_line_up_parentheses && $rOpts_asbl && $ris_asub_block->{$outer_seqno} ) { $do_not_weld_rule = '2A'; } # DO-NOT-WELD RULE 3: # Do not weld if this makes our line too long. # Use a tolerance which depends on if the old tokens were welded # (fixes cases b746 b748 b749 b750 b752 b753 b754 b755 b756 b758 b759) if ( !$do_not_weld_rule ) { # Measure to a little beyond the inner opening token if it is # followed by a bare word, which may have unusual line break rules. # NOTE: Originally this was OLD RULE 6: do not weld to a container # which is followed on the same line by an unknown bareword token. # This can cause blinkers (cases b626, b611). But OK to weld one # line welds to fix cases b1057 b1064. For generality, OLD RULE 6 # has been merged into RULE 3 here to also fix cases b1078 b1091. my $K_for_length = $Kinner_opening; my $Knext_io = $self->K_next_nonblank($Kinner_opening); next unless ( defined($Knext_io) ); # shouldn't happen my $type_io_next = $rLL->[$Knext_io]->[_TYPE_]; # Note: may need to eventually also include other types here, # such as 'Z' and 'Y': if ($type_io_next =~ /^[ZYw]$/) { if ( $type_io_next eq 'w' ) { my $Knext_io2 = $self->K_next_nonblank($Knext_io); next unless ( defined($Knext_io2) ); my $type_io_next2 = $rLL->[$Knext_io2]->[_TYPE_]; if ( !$type_ok_after_bareword{$type_io_next2} ) { $K_for_length = $Knext_io2; } } # Use a tolerance for welds over multiple lines to avoid blinkers. # We can use zero tolerance if it looks like we are working on an # existing weld. my $tol = $is_one_line_weld || $is_multiline_weld ? $single_line_tol : $multiline_tol; # By how many characters does this exceed the text window? my $excess = $self->cumulative_length_before_K($K_for_length) - $starting_lentot + 1 + $tol - $maximum_text_length; # Old patch: Use '>=0' instead of '> 0' here to fix cases b995 b998 # b1000 b1001 b1007 b1008 b1009 b1010 b1011 b1012 b1016 b1017 b1018 # Revised patch: New tolerance definition allows going back to '> 0' # here. This fixes case b1124. See also cases b1087 and b1087a. if ( $excess > 0 ) { $do_not_weld_rule = 3 } if (DEBUG_WELD) { $Msg .= "RULE 3 test: excess length to K=$Kinner_opening is $excess > 0 with tol= $tol ?) \n"; } } # DO-NOT-WELD RULE 4; implemented for git#10: # Do not weld an opening -ce brace if the next container is on a single # line, different from the opening brace. (This is very rare). For # example, given the following with -ce, we will avoid joining the { # and [ # } else { # [ $_, length($_) ] # } # because this would produce a terminal one-line block: # } else { [ $_, length($_) ] } # which may not be what is desired. But given this input: # } else { [ $_, length($_) ] } # then we will do the weld and retain the one-line block if ( !$do_not_weld_rule && $rOpts->{'cuddled-else'} ) { my $block_type = $rblock_type_of_seqno->{$outer_seqno}; if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) { my $io_line = $inner_opening->[_LINE_INDEX_]; my $ic_line = $inner_closing->[_LINE_INDEX_]; my $oo_line = $outer_opening->[_LINE_INDEX_]; if ( $oo_line < $io_line && $ic_line == $io_line ) { $do_not_weld_rule = 4; } } } # DO-NOT-WELD RULE 5: do not include welds excluded by user if ( !$do_not_weld_rule && %weld_nested_exclusion_rules && ( $self->is_excluded_weld( $Kouter_opening, $starting_new_weld ) || $self->is_excluded_weld( $Kinner_opening, 0 ) ) ) { $do_not_weld_rule = 5; } # DO-NOT-WELD RULE 6: This has been merged into RULE 3 above. if ($do_not_weld_rule) { # After neglecting a pair, we start measuring from start of point # io ... but not if previous type does not like to be separated # from its container (fixes case b1184) my $Kprev = $self->K_previous_nonblank($Kinner_opening); my $type_prev = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'w'; if ( !$has_tight_paren{$type_prev} ) { my $starting_level = $inner_opening->[_LEVEL_]; my $starting_ci_level = $inner_opening->[_CI_LEVEL_]; $starting_lentot = $self->cumulative_length_before_K($Kinner_opening); $maximum_text_length = $maximum_text_length_at_level[$starting_level] - $starting_ci_level * $rOpts_continuation_indentation; } if (DEBUG_WELD) { $Msg .= "Not welding due to RULE $do_not_weld_rule\n"; print {*STDOUT} $Msg; } # Normally, a broken pair should not decrease indentation of # intermediate tokens: ## if ( $last_pair_broken ) { next } # However, for long strings of welded tokens, such as '{{{{{{...' # we will allow broken pairs to also remove indentation. # This will keep very long strings of opening and closing # braces from marching off to the right. We will do this if the # number of tokens in a weld before the broken weld is 4 or more. # This rule will mainly be needed for test scripts, since typical # welds have fewer than about 4 welded tokens. if ( !@welds || @{ $welds[-1] } < 4 ) { next } } # otherwise start new weld ... elsif ($starting_new_weld) { $weld_count_this_start++; if (DEBUG_WELD) { $Msg .= "Starting new weld\n"; print {*STDOUT} $Msg; } push @welds, $item; my $parent_seqno = $self->parent_seqno_by_K($Kouter_closing); $weld_starts_in_block = $parent_seqno == SEQ_ROOT || $rblock_type_of_seqno->{$parent_seqno}; $rK_weld_right->{$Kouter_opening} = $Kinner_opening; $rK_weld_left->{$Kinner_opening} = $Kouter_opening; $rK_weld_right->{$Kinner_closing} = $Kouter_closing; $rK_weld_left->{$Kouter_closing} = $Kinner_closing; } # ... or extend current weld else { $weld_count_this_start++; if (DEBUG_WELD) { $Msg .= "Extending current weld\n"; print {*STDOUT} $Msg; } unshift @{ $welds[-1] }, $inner_seqno; $rK_weld_right->{$Kouter_opening} = $Kinner_opening; $rK_weld_left->{$Kinner_opening} = $Kouter_opening; $rK_weld_right->{$Kinner_closing} = $Kouter_closing; $rK_weld_left->{$Kouter_closing} = $Kinner_closing; # Keep a broken container broken at multiple welds. This might # also be useful for simple welds, but for now it is restricted # to multiple welds to minimize changes to existing coding. This # fixes b1429, b1430. Updated for issue c198: but allow a # line differences of 1 (simple shear) so that a simple shear # can remain or become a single line. if ( $iline_ic - $iline_io > 1 ) { # Only set this break if it is the last possible weld in this # chain. This will keep some extreme test cases unchanged. my $is_chain_end = !@{$rnested_pairs} || $rnested_pairs->[-1]->[1] != $inner_seqno; if ($is_chain_end) { $self->[_rbreak_container_]->{$inner_seqno} = 1; } } } # After welding, reduce the indentation level if all intermediate tokens my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_]; if ( $dlevel != 0 ) { my $Kstart = $Kinner_opening; my $Kstop = $Kinner_closing; foreach my $KK ( $Kstart .. $Kstop ) { $rLL->[$KK]->[_LEVEL_] += $dlevel; } # Copy opening ci level to help break at = for -lp mode (case b1124) $rLL->[$Kinner_opening]->[_CI_LEVEL_] = $rLL->[$Kouter_opening]->[_CI_LEVEL_]; # But only copy the closing ci level if the outer container is # in a block; otherwise poor results can be produced. if ($weld_starts_in_block) { $rLL->[$Kinner_closing]->[_CI_LEVEL_] = $rLL->[$Kouter_closing]->[_CI_LEVEL_]; } } } return; } ## end sub weld_nested_containers sub weld_nested_quotes { # Called once per file for option '--weld-nested-containers'. This # does welding on qw quotes. my $self = shift; # See if quotes are excluded from welding my $rflags = $weld_nested_exclusion_rules{'q'}; return if ( defined($rflags) && defined( $rflags->[1] ) ); my $rK_weld_left = $self->[_rK_weld_left_]; my $rK_weld_right = $self->[_rK_weld_right_]; my $rLL = $self->[_rLL_]; return unless ( defined($rLL) && @{$rLL} ); my $Num = @{$rLL}; my $K_opening_container = $self->[_K_opening_container_]; my $K_closing_container = $self->[_K_closing_container_]; my $rlines = $self->[_rlines_]; my $starting_lentot; my $maximum_text_length; my $is_single_quote = sub { my ( $Kbeg, $Kend, $quote_type ) = @_; foreach my $K ( $Kbeg .. $Kend ) { my $test_type = $rLL->[$K]->[_TYPE_]; next if ( $test_type eq 'b' ); return if ( $test_type ne $quote_type ); } return 1; }; # Length tolerance - same as previously used for sub weld_nested my $multiline_tol = 1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation ); # look for single qw quotes nested in containers my $KNEXT = $self->[_K_first_seq_item_]; while ( defined($KNEXT) ) { my $KK = $KNEXT; $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_]; my $rtoken_vars = $rLL->[$KK]; my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_]; if ( !$outer_seqno ) { next if ( $KK == 0 ); # first token in file may not be container # A fault here implies that an error was made in the little loop at # the bottom of sub 'respace_tokens' which set the values of # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the # loop control lines above. Fault("sequence = $outer_seqno not defined at K=$KK") if (DEVEL_MODE); next; } my $token = $rtoken_vars->[_TOKEN_]; if ( $is_opening_token{$token} ) { # see if the next token is a quote of some type my $Kn = $KK + 1; $Kn += 1 if ( $Kn < $Num && $rLL->[$Kn]->[_TYPE_] eq 'b' ); next if ( $Kn >= $Num ); my $next_token = $rLL->[$Kn]->[_TOKEN_]; my $next_type = $rLL->[$Kn]->[_TYPE_]; next unless ( ( $next_type eq 'q' || $next_type eq 'Q' ) && substr( $next_token, 0, 1 ) eq 'q' ); # The token before the closing container must also be a quote my $Kouter_closing = $K_closing_container->{$outer_seqno}; my $Kinner_closing = $self->K_previous_nonblank($Kouter_closing); next unless $rLL->[$Kinner_closing]->[_TYPE_] eq $next_type; # This is an inner opening container my $Kinner_opening = $Kn; # Do not weld to single-line quotes. Nothing is gained, and it may # look bad. next if ( $Kinner_closing == $Kinner_opening ); # Only weld to quotes delimited with container tokens. This is # because welding to arbitrary quote delimiters can produce code # which is less readable than without welding. my $closing_delimiter = substr( $rLL->[$Kinner_closing]->[_TOKEN_], -1, 1 ); next unless ( $is_closing_token{$closing_delimiter} || $closing_delimiter eq '>' ); # Now make sure that there is just a single quote in the container next unless ( $is_single_quote->( $Kinner_opening + 1, $Kinner_closing - 1, $next_type ) ); # OK: This is a candidate for welding my $Msg = EMPTY_STRING; my $do_not_weld; my $Kouter_opening = $K_opening_container->{$outer_seqno}; my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_]; my $iline_io = $rLL->[$Kinner_opening]->[_LINE_INDEX_]; my $iline_oc = $rLL->[$Kouter_closing]->[_LINE_INDEX_]; my $iline_ic = $rLL->[$Kinner_closing]->[_LINE_INDEX_]; my $is_old_weld = ( $iline_oo == $iline_io && $iline_ic == $iline_oc ); # Fix for case b1189. If quote is marked as type 'Q' then only weld # if the two closing tokens are on the same input line. Otherwise, # the closing line will be output earlier in the pipeline than # other CODE lines and welding will not actually occur. This will # leave a half-welded structure with potential formatting # instability. This might be fixed by adding a check for a weld on # a closing Q token and sending it down the normal channel, but it # would complicate the code and is potentially risky. next if (!$is_old_weld && $next_type eq 'Q' && $iline_ic != $iline_oc ); # If welded, the line must not exceed allowed line length ( my $ok_to_weld, $maximum_text_length, $starting_lentot, my $msg ) = $self->setup_new_weld_measurements( $Kouter_opening, $Kinner_opening ); if ( !$ok_to_weld ) { if (DEBUG_WELD) { print {*STDOUT} $msg } next; } my $length = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_] - $starting_lentot; my $excess = $length + $multiline_tol - $maximum_text_length; my $excess_max = ( $is_old_weld ? $multiline_tol : 0 ); if ( $excess >= $excess_max ) { $do_not_weld = 1; } if (DEBUG_WELD) { if ( !$is_old_weld ) { $is_old_weld = EMPTY_STRING } $Msg .= "excess=$excess>=$excess_max, multiline_tol=$multiline_tol, is_old_weld='$is_old_weld'\n"; } # Check weld exclusion rules for outer container if ( !$do_not_weld ) { my $is_leading = !defined( $rK_weld_left->{$Kouter_opening} ); if ( $self->is_excluded_weld( $KK, $is_leading ) ) { if (DEBUG_WELD) { $Msg .= "No qw weld due to weld exclusion rules for outer container\n"; } $do_not_weld = 1; } } # Check the length of the last line (fixes case b1039) if ( !$do_not_weld ) { my $rK_range_ic = $rlines->[$iline_ic]->{_rK_range}; my ( $Kfirst_ic, $Klast_ic ) = @{$rK_range_ic}; my $excess_ic = $self->excess_line_length_for_Krange( $Kfirst_ic, $Kouter_closing ); # Allow extra space for additional welded closing container(s) # and a space and comma or semicolon. # NOTE: weld len has not been computed yet. Use 2 spaces # for now, correct for a single weld. This estimate could # be made more accurate if necessary. my $weld_len = defined( $rK_weld_right->{$Kouter_closing} ) ? 2 : 0; if ( $excess_ic + $weld_len + 2 > 0 ) { if (DEBUG_WELD) { $Msg .= "No qw weld due to excess ending line length=$excess_ic + $weld_len + 2 > 0\n"; } $do_not_weld = 1; } } if ($do_not_weld) { if (DEBUG_WELD) { $Msg .= "Not Welding QW\n"; print {*STDOUT} $Msg; } next; } # OK to weld if (DEBUG_WELD) { $Msg .= "Welding QW\n"; print {*STDOUT} $Msg; } $rK_weld_right->{$Kouter_opening} = $Kinner_opening; $rK_weld_left->{$Kinner_opening} = $Kouter_opening; $rK_weld_right->{$Kinner_closing} = $Kouter_closing; $rK_weld_left->{$Kouter_closing} = $Kinner_closing; # Undo one indentation level if an extra level was added to this # multiline quote my $qw_seqno = $self->[_rstarting_multiline_qw_seqno_by_K_]->{$Kinner_opening}; if ( $qw_seqno && $self->[_rmultiline_qw_has_extra_level_]->{$qw_seqno} ) { foreach my $K ( $Kinner_opening + 1 .. $Kinner_closing - 1 ) { $rLL->[$K]->[_LEVEL_] -= 1; } $rLL->[$Kinner_opening]->[_CI_LEVEL_] = 0; $rLL->[$Kinner_closing]->[_CI_LEVEL_] = 0; } # undo CI for other welded quotes else { foreach my $K ( $Kinner_opening .. $Kinner_closing ) { $rLL->[$K]->[_CI_LEVEL_] = 0; } } # Change the level of a closing qw token to be that of the outer # containing token. This will allow -lp indentation to function # correctly in the vertical aligner. # Patch to fix c002: but not if it contains text if ( length( $rLL->[$Kinner_closing]->[_TOKEN_] ) == 1 ) { $rLL->[$Kinner_closing]->[_LEVEL_] = $rLL->[$Kouter_closing]->[_LEVEL_]; } } } return; } ## end sub weld_nested_quotes sub is_welded_at_seqno { my ( $self, $seqno ) = @_; # given a sequence number: # return true if it is welded either left or right # return false otherwise return unless ( $total_weld_count && defined($seqno) ); my $KK_o = $self->[_K_opening_container_]->{$seqno}; return unless defined($KK_o); return defined( $self->[_rK_weld_left_]->{$KK_o} ) || defined( $self->[_rK_weld_right_]->{$KK_o} ); } ## end sub is_welded_at_seqno sub mark_short_nested_blocks { # This routine looks at the entire file and marks any short nested blocks # which should not be broken. The results are stored in the hash # $rshort_nested->{$type_sequence} # which will be true if the container should remain intact. # # For example, consider the following line: # sub cxt_two { sort { $a <=> $b } test_if_list() } # The 'sort' block is short and nested within an outer sub block. # Normally, the existence of the 'sort' block will force the sub block to # break open, but this is not always desirable. Here we will set a flag for # the sort block to prevent this. To give the user control, we will # follow the input file formatting. If either of the blocks is broken in # the input file then we will allow it to remain broken. Otherwise we will # set a flag to keep it together in later formatting steps. # The flag which is set here will be checked in two places: # 'sub process_line_of_CODE' and 'sub starting_one_line_block' my $self = shift; return if $rOpts->{'indent-only'}; my $rLL = $self->[_rLL_]; return unless ( defined($rLL) && @{$rLL} ); return unless ( $rOpts->{'one-line-block-nesting'} ); my $K_opening_container = $self->[_K_opening_container_]; my $K_closing_container = $self->[_K_closing_container_]; my $rbreak_container = $self->[_rbreak_container_]; my $ris_broken_container = $self->[_ris_broken_container_]; my $rshort_nested = $self->[_rshort_nested_]; my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; # Variables needed for estimating line lengths my $maximum_text_length; my $starting_lentot; my $length_tol = 1; my $excess_length_to_K = sub { my ($K) = @_; # Estimate the length from the line start to a given token my $length = $self->cumulative_length_before_K($K) - $starting_lentot; my $excess_length = $length + $length_tol - $maximum_text_length; return ($excess_length); }; # loop over all containers my @open_block_stack; my $iline = -1; my $KNEXT = $self->[_K_first_seq_item_]; while ( defined($KNEXT) ) { my $KK = $KNEXT; $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_]; my $rtoken_vars = $rLL->[$KK]; my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; if ( !$type_sequence ) { next if ( $KK == 0 ); # first token in file may not be container # A fault here implies that an error was made in the little loop at # the bottom of sub 'respace_tokens' which set the values of # _KNEXT_SEQ_ITEM_. Or an error has been introduced in the # loop control lines above. Fault("sequence = $type_sequence not defined at K=$KK") if (DEVEL_MODE); next; } # Patch: do not mark short blocks with welds. # In some cases blinkers can form (case b690). if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) { next; } # We are just looking at code blocks my $token = $rtoken_vars->[_TOKEN_]; my $type = $rtoken_vars->[_TYPE_]; next unless ( $type eq $token ); next unless ( $rblock_type_of_seqno->{$type_sequence} ); # Keep a stack of all acceptable block braces seen. # Only consider blocks entirely on one line so dump the stack when line # changes. my $iline_last = $iline; $iline = $rLL->[$KK]->[_LINE_INDEX_]; if ( $iline != $iline_last ) { @open_block_stack = () } if ( $token eq '}' ) { if (@open_block_stack) { pop @open_block_stack } } next unless ( $token eq '{' ); # block must be balanced (bad scripts may be unbalanced) my $K_opening = $K_opening_container->{$type_sequence}; my $K_closing = $K_closing_container->{$type_sequence}; next unless ( defined($K_opening) && defined($K_closing) ); # require that this block be entirely on one line next if ( $ris_broken_container->{$type_sequence} || $rbreak_container->{$type_sequence} ); # See if this block fits on one line of allowed length (which may # be different from the input script) $starting_lentot = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; my $level = $rLL->[$KK]->[_LEVEL_]; my $ci_level = $rLL->[$KK]->[_CI_LEVEL_]; $maximum_text_length = $maximum_text_length_at_level[$level] - $ci_level * $rOpts_continuation_indentation; # Dump the stack if block is too long and skip this block if ( $excess_length_to_K->($K_closing) > 0 ) { @open_block_stack = (); next; } # OK, Block passes tests, remember it push @open_block_stack, $type_sequence; # We are only marking nested code blocks, # so check for a previous block on the stack next if ( @open_block_stack <= 1 ); # Looks OK, mark this as a short nested block $rshort_nested->{$type_sequence} = 1; } return; } ## end sub mark_short_nested_blocks sub special_indentation_adjustments { my ($self) = @_; # Called once per file to define the levels to be used for computing # actual indentation. These levels are initialized to be the structural # levels and then are adjusted if necessary for special purposes. # The adjustments are made either by changing _CI_LEVEL_ directly or # by setting modified levels in the array $self->[_radjusted_levels_]. # NOTE: This routine is called after the weld routines, which may have # already adjusted the initial values of _LEVEL_, so we are making # adjustments on top of those levels. It would be nicer to have the # weld routines also use this adjustment, but that gets complicated # when we combine -gnu -wn and also have some welded quotes. my $Klimit = $self->[_Klimit_]; my $rLL = $self->[_rLL_]; my $radjusted_levels = $self->[_radjusted_levels_]; return unless ( defined($Klimit) ); # Initialize the adjusted levels to be the structural levels foreach my $KK ( 0 .. $Klimit ) { $radjusted_levels->[$KK] = $rLL->[$KK]->[_LEVEL_]; } # First set adjusted levels for any non-indenting braces. $self->do_non_indenting_braces(); # Adjust breaks and indentation list containers $self->break_before_list_opening_containers(); # Set adjusted levels for the whitespace cycle option. $self->whitespace_cycle_adjustment(); $self->braces_left_setup(); # Adjust continuation indentation if -bli is set $self->bli_adjustment(); $self->extended_ci() if ($rOpts_extended_continuation_indentation); # Now clip any adjusted levels to be non-negative $self->clip_adjusted_levels(); return; } ## end sub special_indentation_adjustments sub clip_adjusted_levels { # Replace any negative adjusted levels with zero. # Negative levels can occur in files with brace errors. my ($self) = @_; my $radjusted_levels = $self->[_radjusted_levels_]; return unless defined($radjusted_levels) && @{$radjusted_levels}; my $min = min( @{$radjusted_levels} ); # fast check for min if ( $min < 0 ) { # slow loop, but rarely needed foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) } } return; } ## end sub clip_adjusted_levels sub do_non_indenting_braces { # Called once per file to handle the --non-indenting-braces parameter. # Remove indentation within marked braces if requested my ($self) = @_; # Any non-indenting braces have been found by sub find_non_indenting_braces # and are defined by the following hash: my $rseqno_non_indenting_brace_by_ix = $self->[_rseqno_non_indenting_brace_by_ix_]; return unless ( %{$rseqno_non_indenting_brace_by_ix} ); my $rlines = $self->[_rlines_]; my $K_opening_container = $self->[_K_opening_container_]; my $K_closing_container = $self->[_K_closing_container_]; my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_]; my $radjusted_levels = $self->[_radjusted_levels_]; # First locate all of the marked blocks my @K_stack; foreach my $ix ( keys %{$rseqno_non_indenting_brace_by_ix} ) { my $seqno = $rseqno_non_indenting_brace_by_ix->{$ix}; my $KK = $K_opening_container->{$seqno}; my $line_of_tokens = $rlines->[$ix]; my $rK_range = $line_of_tokens->{_rK_range}; my ( $Kfirst, $Klast ) = @{$rK_range}; $rspecial_side_comment_type->{$Klast} = 'NIB'; push @K_stack, [ $KK, 1 ]; my $Kc = $K_closing_container->{$seqno}; push @K_stack, [ $Kc, -1 ] if ( defined($Kc) ); } return unless (@K_stack); @K_stack = sort { $a->[0] <=> $b->[0] } @K_stack; # Then loop to remove indentation within marked blocks my $KK_last = 0; my $ndeep = 0; foreach my $item (@K_stack) { my ( $KK, $inc ) = @{$item}; if ( $ndeep > 0 ) { foreach ( $KK_last + 1 .. $KK ) { $radjusted_levels->[$_] -= $ndeep; } # We just subtracted the old $ndeep value, which only applies to a # '{'. The new $ndeep applies to a '}', so we undo the error. if ( $inc < 0 ) { $radjusted_levels->[$KK] += 1 } } $ndeep += $inc; $KK_last = $KK; } return; } ## end sub do_non_indenting_braces sub whitespace_cycle_adjustment { my $self = shift; # Called once per file to implement the --whitespace-cycle option my $rLL = $self->[_rLL_]; return unless ( defined($rLL) && @{$rLL} ); my $radjusted_levels = $self->[_radjusted_levels_]; my $maximum_level = $self->[_maximum_level_]; if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 && $rOpts_whitespace_cycle < $maximum_level ) { my $Kmax = @{$rLL} - 1; my $whitespace_last_level = -1; my @whitespace_level_stack = (); my $last_nonblank_type = 'b'; my $last_nonblank_token = EMPTY_STRING; foreach my $KK ( 0 .. $Kmax ) { my $level_abs = $radjusted_levels->[$KK]; my $level = $level_abs; if ( $level_abs < $whitespace_last_level ) { pop(@whitespace_level_stack); } if ( !@whitespace_level_stack ) { push @whitespace_level_stack, $level_abs; } else { if ( $level_abs > $whitespace_last_level ) { $level = $whitespace_level_stack[-1] + ( $level_abs - $whitespace_last_level ); if ( # 1 Try to break at a block brace ( $level > $rOpts_whitespace_cycle && $last_nonblank_type eq '{' && $last_nonblank_token eq '{' ) # 2 Then either a brace or bracket || ( $level > $rOpts_whitespace_cycle + 1 && $last_nonblank_token =~ /^[\{\[]$/ ) # 3 Then a paren too || $level > $rOpts_whitespace_cycle + 2 ) { $level = 1; } push @whitespace_level_stack, $level; } } $level = $whitespace_level_stack[-1]; $radjusted_levels->[$KK] = $level; $whitespace_last_level = $level_abs; my $type = $rLL->[$KK]->[_TYPE_]; my $token = $rLL->[$KK]->[_TOKEN_]; if ( $type ne 'b' ) { $last_nonblank_type = $type; $last_nonblank_token = $token; } } } return; } ## end sub whitespace_cycle_adjustment use constant DEBUG_BBX => 0; sub break_before_list_opening_containers { my ($self) = @_; # This routine is called once per batch to implement parameters # --break-before-hash-brace=n and similar -bbx=n flags # and their associated indentation flags: # --break-before-hash-brace-and-indent and similar -bbxi=n # Nothing to do if none of the -bbx=n parameters has been set return unless %break_before_container_types; my $rLL = $self->[_rLL_]; return unless ( defined($rLL) && @{$rLL} ); # Loop over all opening container tokens my $K_opening_container = $self->[_K_opening_container_]; my $K_closing_container = $self->[_K_closing_container_]; my $ris_broken_container = $self->[_ris_broken_container_]; my $ris_permanently_broken = $self->[_ris_permanently_broken_]; my $rhas_list = $self->[_rhas_list_]; my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_]; my $radjusted_levels = $self->[_radjusted_levels_]; my $rparent_of_seqno = $self->[_rparent_of_seqno_]; my $rlines = $self->[_rlines_]; my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_]; my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_]; my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_]; my $rK_weld_right = $self->[_rK_weld_right_]; my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; my $length_tol = max( 1, $rOpts_continuation_indentation, $rOpts_indent_columns ); if ($rOpts_ignore_old_breakpoints) { # Patch suggested by b1231; the old tol was excessive. ## $length_tol += $rOpts_maximum_line_length; $length_tol *= 2; } my $rbreak_before_container_by_seqno = {}; my $rwant_reduced_ci = {}; foreach my $seqno ( keys %{$K_opening_container} ) { #---------------------------------------------------------------- # Part 1: Examine any -bbx=n flags #---------------------------------------------------------------- next if ( $rblock_type_of_seqno->{$seqno} ); my $KK = $K_opening_container->{$seqno}; # This must be a list or contain a list. # Note1: switched from 'has_broken_list' to 'has_list' to fix b1024. # Note2: 'has_list' holds the depth to the sub-list. We will require # a depth of just 1 my $is_list = $self->is_list_by_seqno($seqno); my $has_list = $rhas_list->{$seqno}; # Fix for b1173: if welded opening container, use flag of innermost # seqno. Otherwise, the restriction $has_list==1 prevents triple and # higher welds from following the -BBX parameters. if ($total_weld_count) { my $KK_test = $rK_weld_right->{$KK}; if ( defined($KK_test) ) { my $seqno_inner = $rLL->[$KK_test]->[_TYPE_SEQUENCE_]; $is_list ||= $self->is_list_by_seqno($seqno_inner); $has_list = $rhas_list->{$seqno_inner}; } } next unless ( $is_list || $has_list && $has_list == 1 ); my $has_list_with_lec = $rhas_broken_list_with_lec->{$seqno}; # Only for types of container tokens with a non-default break option my $token = $rLL->[$KK]->[_TOKEN_]; my $break_option = $break_before_container_types{$token}; next unless ($break_option); # Do not use -bbx under stress for stability ... fixes b1300 # TODO: review this; do we also need to look at stress_level_lalpha? my $level = $rLL->[$KK]->[_LEVEL_]; if ( $level >= $stress_level_beta ) { DEBUG_BBX && print "BBX: Switching off at $seqno: level=$level exceeds beta stress level=$stress_level_beta\n"; next; } # Require previous nonblank to be '=' or '=>' my $Kprev = $KK - 1; next if ( $Kprev < 0 ); my $prev_type = $rLL->[$Kprev]->[_TYPE_]; if ( $prev_type eq 'b' ) { $Kprev--; next if ( $Kprev < 0 ); $prev_type = $rLL->[$Kprev]->[_TYPE_]; } next unless ( $is_equal_or_fat_comma{$prev_type} ); my $ci = $rLL->[$KK]->[_CI_LEVEL_]; #-------------------------------------------- # New coding for option 2 (break if complex). #-------------------------------------------- # This new coding uses clues which are invariant under formatting to # decide if a list is complex. For now it is only applied when -lp # and -vmll are used, but eventually it may become the standard method. # Fixes b1274, b1275, and others, including b1099. if ( $break_option == 2 ) { if ( $rOpts_line_up_parentheses || $rOpts_variable_maximum_line_length ) { # Start with the basic definition of a complex list... my $is_complex = $is_list && $has_list; # and it is also complex if the parent is a list if ( !$is_complex ) { my $parent = $rparent_of_seqno->{$seqno}; if ( $self->is_list_by_seqno($parent) ) { $is_complex = 1; } } # finally, we will call it complex if there are inner opening # and closing container tokens, not parens, within the outer # container tokens. if ( !$is_complex ) { my $Kp = $self->K_next_nonblank($KK); my $token_p = defined($Kp) ? $rLL->[$Kp]->[_TOKEN_] : 'b'; if ( $is_opening_token{$token_p} && $token_p ne '(' ) { my $Kc = $K_closing_container->{$seqno}; my $Km = $self->K_previous_nonblank($Kc); my $token_m = defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b'; # ignore any optional ending comma if ( $token_m eq ',' ) { $Km = $self->K_previous_nonblank($Km); $token_m = defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b'; } $is_complex ||= $is_closing_token{$token_m} && $token_m ne ')'; } } # Convert to option 3 (always break) if complex next unless ($is_complex); $break_option = 3; } } # Fix for b1231: the has_list_with_lec does not cover all cases. # A broken container containing a list and with line-ending commas # will stay broken, so can be treated as if it had a list with lec. $has_list_with_lec ||= $has_list && $ris_broken_container->{$seqno} && $rlec_count_by_seqno->{$seqno}; DEBUG_BBX && print {*STDOUT} "BBX: Looking at seqno=$seqno, token = $token with option=$break_option\n"; # -bbx=1 = stable, try to follow input if ( $break_option == 1 ) { my $iline = $rLL->[$KK]->[_LINE_INDEX_]; my $rK_range = $rlines->[$iline]->{_rK_range}; my ( $Kfirst, $Klast ) = @{$rK_range}; next unless ( $KK == $Kfirst ); } # -bbx=2 => apply this style only for a 'complex' list elsif ( $break_option == 2 ) { # break if this list contains a broken list with line-ending comma my $ok_to_break; my $Msg = EMPTY_STRING; if ($has_list_with_lec) { $ok_to_break = 1; DEBUG_BBX && do { $Msg = "has list with lec;" }; } if ( !$ok_to_break ) { # Turn off -xci if -bbx=2 and this container has a sublist but # not a broken sublist. This avoids creating blinkers. The # problem is that -xci can cause one-line lists to break open, # and thereby creating formatting instability. # This fixes cases b1033 b1036 b1037 b1038 b1042 b1043 b1044 # b1045 b1046 b1047 b1051 b1052 b1061. if ($has_list) { $rno_xci_by_seqno->{$seqno} = 1 } my $parent = $rparent_of_seqno->{$seqno}; if ( $self->is_list_by_seqno($parent) ) { DEBUG_BBX && do { $Msg = "parent is list" }; $ok_to_break = 1; } } if ( !$ok_to_break ) { DEBUG_BBX && print {*STDOUT} "Not breaking at seqno=$seqno: $Msg\n"; next; } DEBUG_BBX && print {*STDOUT} "OK to break at seqno=$seqno: $Msg\n"; # Patch: turn off -xci if -bbx=2 and -lp # This fixes cases b1090 b1095 b1101 b1116 b1118 b1121 b1122 $rno_xci_by_seqno->{$seqno} = 1 if ($rOpts_line_up_parentheses); } # -bbx=3 = always break elsif ( $break_option == 3 ) { # ok to break } # Shouldn't happen! Bad flag, but make behavior same as 3 else { # ok to break } # Set a flag for actual implementation later in # sub insert_breaks_before_list_opening_containers $rbreak_before_container_by_seqno->{$seqno} = 1; DEBUG_BBX && print {*STDOUT} "BBX: ok to break at seqno=$seqno\n"; # -bbxi=0: Nothing more to do if the ci value remains unchanged my $ci_flag = $container_indentation_options{$token}; next unless ($ci_flag); # -bbxi=1: This option removes ci and is handled in # later sub get_final_indentation if ( $ci_flag == 1 ) { $rwant_reduced_ci->{$seqno} = 1; next; } # -bbxi=2: This option changes the level ... # This option can conflict with -xci in some cases. We can turn off # -xci for this container to avoid blinking. For now, only do this if # -vmll is set. ( fixes b1335, b1336 ) if ($rOpts_variable_maximum_line_length) { $rno_xci_by_seqno->{$seqno} = 1; } #---------------------------------------------------------------- # Part 2: Perform tests before committing to changing ci and level #---------------------------------------------------------------- # Before changing the ci level of the opening container, we need # to be sure that the container will be broken in the later stages of # formatting. We have to do this because we are working early in the # formatting pipeline. A problem can occur if we change the ci or # level of the opening token but do not actually break the container # open as expected. In most cases it wouldn't make any difference if # we changed ci or not, but there are some edge cases where this # can cause blinking states, so we need to try to only change ci if # the container will really be broken. # Only consider containers already broken next if ( !$ris_broken_container->{$seqno} ); # Patch to fix issue b1305: the combination of -naws and ci>i appears # to cause an instability. It should almost never occur in practice. next if (!$rOpts_add_whitespace && $rOpts_continuation_indentation > $rOpts_indent_columns ); # Always ok to change ci for permanently broken containers if ( $ris_permanently_broken->{$seqno} ) { } # Always OK if this list contains a broken sub-container with # a non-terminal line-ending comma elsif ($has_list_with_lec) { } # Otherwise, we are considering a single container... else { # A single container must have at least 1 line-ending comma: next unless ( $rlec_count_by_seqno->{$seqno} ); my $OK; # Since it has a line-ending comma, it will stay broken if the # -boc flag is set if ($rOpts_break_at_old_comma_breakpoints) { $OK = 1 } # OK if the container contains multiple fat commas # Better: multiple lines with fat commas if ( !$OK && !$rOpts_ignore_old_breakpoints ) { my $rtype_count = $rtype_count_by_seqno->{$seqno}; next unless ($rtype_count); my $fat_comma_count = $rtype_count->{'=>'}; DEBUG_BBX && print {*STDOUT} "BBX: fat comma count=$fat_comma_count\n"; if ( $fat_comma_count && $fat_comma_count >= 2 ) { $OK = 1 } } # The last check we can make is to see if this container could # fit on a single line. Use the least possible indentation # estimate, ci=0, so we are not subtracting $ci * # $rOpts_continuation_indentation from tabulated # $maximum_text_length value. if ( !$OK ) { my $maximum_text_length = $maximum_text_length_at_level[$level]; my $K_closing = $K_closing_container->{$seqno}; my $length = $self->cumulative_length_before_K($K_closing) - $self->cumulative_length_before_K($KK); my $excess_length = $length - $maximum_text_length; DEBUG_BBX && print {*STDOUT} "BBX: excess=$excess_length: maximum_text_length=$maximum_text_length, length=$length, ci=$ci\n"; # OK if the net container definitely breaks on length if ( $excess_length > $length_tol ) { $OK = 1; DEBUG_BBX && print {*STDOUT} "BBX: excess_length=$excess_length\n"; } # Otherwise skip it else { next } } } #------------------------------------------------------------ # Part 3: Looks OK: apply -bbx=n and any related -bbxi=n flag #------------------------------------------------------------ DEBUG_BBX && print {*STDOUT} "BBX: OK to break\n"; # -bbhbi=n # -bbsbi=n # -bbpi=n # where: # n=0 default indentation (usually one ci) # n=1 outdent one ci # n=2 indent one level (minus one ci) # n=3 indent one extra ci [This may be dropped] # NOTE: We are adjusting indentation of the opening container. The # closing container will normally follow the indentation of the opening # container automatically, so this is not currently done. next unless ($ci); # option 1: outdent if ( $ci_flag == 1 ) { $ci -= 1; } # option 2: indent one level elsif ( $ci_flag == 2 ) { $ci -= 1; $radjusted_levels->[$KK] += 1; } # unknown option else { # Shouldn't happen - leave ci unchanged } $rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 ); } $self->[_rbreak_before_container_by_seqno_] = $rbreak_before_container_by_seqno; $self->[_rwant_reduced_ci_] = $rwant_reduced_ci; return; } ## end sub break_before_list_opening_containers use constant DEBUG_XCI => 0; sub extended_ci { # This routine implements the -xci (--extended-continuation-indentation) # flag. We add CI to interior tokens of a container which itself has CI but # only if a token does not already have CI. # To do this, we will locate opening tokens which themselves have # continuation indentation (CI). We track them with their sequence # numbers. These sequence numbers are called 'controlling sequence # numbers'. They apply continuation indentation to the tokens that they # contain. These inner tokens remember their controlling sequence numbers. # Later, when these inner tokens are output, they have to see if the output # lines with their controlling tokens were output with CI or not. If not, # then they must remove their CI too. # The controlling CI concept works hierarchically. But CI itself is not # hierarchical; it is either on or off. There are some rare instances where # it would be best to have hierarchical CI too, but not enough to be worth # the programming effort. # The operations to remove unwanted CI are done in sub 'undo_ci'. my ($self) = @_; my $rLL = $self->[_rLL_]; return unless ( defined($rLL) && @{$rLL} ); my $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_]; my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_]; my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_]; my $ris_bli_container = $self->[_ris_bli_container_]; my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; my %available_space; # Loop over all opening container tokens my $K_opening_container = $self->[_K_opening_container_]; my $K_closing_container = $self->[_K_closing_container_]; my @seqno_stack; my $seqno_top; my $KLAST; my $KNEXT = $self->[_K_first_seq_item_]; # The following variable can be used to allow a little extra space to # avoid blinkers. A value $len_tol = 20 fixed the following # fixes cases: b1025 b1026 b1027 b1028 b1029 b1030 but NOT b1031. # It turned out that the real problem was mis-parsing a list brace as # a code block in a 'use' statement when the line length was extremely # small. A value of 0 works now, but a slightly larger value can # be used to minimize the chance of a blinker. my $len_tol = 0; while ( defined($KNEXT) ) { # Fix all tokens up to the next sequence item if we are changing CI if ($seqno_top) { my $is_list = $ris_list_by_seqno->{$seqno_top}; my $space = $available_space{$seqno_top}; my $count = 0; foreach my $Kt ( $KLAST + 1 .. $KNEXT - 1 ) { next if ( $rLL->[$Kt]->[_CI_LEVEL_] ); # But do not include tokens which might exceed the line length # and are not in a list. # ... This fixes case b1031 if ( $is_list || $rLL->[$Kt]->[_TOKEN_LENGTH_] < $space || $rLL->[$Kt]->[_TYPE_] eq '#' ) { $rLL->[$Kt]->[_CI_LEVEL_] = 1; $rseqno_controlling_my_ci->{$Kt} = $seqno_top; $count++; } } $ris_seqno_controlling_ci->{$seqno_top} += $count; } $KLAST = $KNEXT; my $KK = $KNEXT; $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_]; my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_]; # see if we have reached the end of the current controlling container if ( $seqno_top && $seqno == $seqno_top ) { $seqno_top = pop @seqno_stack; } # Patch to fix some block types... # Certain block types arrive from the tokenizer without CI but should # have it for this option. These include anonymous subs and # do sort map grep eval my $block_type = $rblock_type_of_seqno->{$seqno}; if ( $block_type && $is_block_with_ci{$block_type} ) { $rLL->[$KK]->[_CI_LEVEL_] = 1; if ($seqno_top) { $rseqno_controlling_my_ci->{$KK} = $seqno_top; $ris_seqno_controlling_ci->{$seqno_top}++; } } # If this does not have ci, update ci if necessary and continue looking else { if ( !$rLL->[$KK]->[_CI_LEVEL_] ) { if ($seqno_top) { $rLL->[$KK]->[_CI_LEVEL_] = 1; $rseqno_controlling_my_ci->{$KK} = $seqno_top; $ris_seqno_controlling_ci->{$seqno_top}++; } next; } } # We are looking for opening container tokens with ci my $K_opening = $K_opening_container->{$seqno}; next unless ( defined($K_opening) && $KK == $K_opening ); # Make sure there is a corresponding closing container # (could be missing if the script has a brace error) my $K_closing = $K_closing_container->{$seqno}; next unless defined($K_closing); # Skip if requested by -bbx to avoid blinkers next if ( $rno_xci_by_seqno->{$seqno} ); # Skip if this is a -bli container (this fixes case b1065) Note: case # b1065 is also fixed by the update for b1055, so this update is not # essential now. But there does not seem to be a good reason to add # xci and bli together, so the update is retained. next if ( $ris_bli_container->{$seqno} ); # Require different input lines. This will filter out a large number # of small hash braces and array brackets. If we accidentally filter # out an important container, it will get fixed on the next pass. if ( $rLL->[$K_opening]->[_LINE_INDEX_] == $rLL->[$K_closing]->[_LINE_INDEX_] && ( $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] - $rLL->[$K_opening]->[_CUMULATIVE_LENGTH_] > $rOpts_maximum_line_length ) ) { DEBUG_XCI && print "XCI: Skipping seqno=$seqno, require different lines\n"; next; } # Do not apply -xci if adding extra ci will put the container contents # beyond the line length limit (fixes cases b899 b935) my $level = $rLL->[$K_opening]->[_LEVEL_]; my $ci_level = $rLL->[$K_opening]->[_CI_LEVEL_]; my $maximum_text_length = $maximum_text_length_at_level[$level] - $ci_level * $rOpts_continuation_indentation; # Fix for b1197 b1198 b1199 b1200 b1201 b1202 # Do not apply -xci if we are running out of space # TODO: review this; do we also need to look at stress_level_alpha? if ( $level >= $stress_level_beta ) { DEBUG_XCI && print "XCI: Skipping seqno=$seqno, level=$level exceeds stress level=$stress_level_beta\n"; next; } # remember how much space is available for patch b1031 above my $space = $maximum_text_length - $len_tol - $rOpts_continuation_indentation; if ( $space < 0 ) { DEBUG_XCI && print "XCI: Skipping seqno=$seqno, space=$space\n"; next; } DEBUG_XCI && print "XCI: OK seqno=$seqno, space=$space\n"; $available_space{$seqno} = $space; # This becomes the next controlling container push @seqno_stack, $seqno_top if ($seqno_top); $seqno_top = $seqno; } return; } ## end sub extended_ci sub braces_left_setup { # Called once per file to mark all -bl, -sbl, and -asbl containers my $self = shift; my $rOpts_bl = $rOpts->{'opening-brace-on-new-line'}; my $rOpts_sbl = $rOpts->{'opening-sub-brace-on-new-line'}; my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'}; return unless ( $rOpts_bl || $rOpts_sbl || $rOpts_asbl ); my $rLL = $self->[_rLL_]; return unless ( defined($rLL) && @{$rLL} ); # We will turn on this hash for braces controlled by these flags: my $rbrace_left = $self->[_rbrace_left_]; my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; my $ris_asub_block = $self->[_ris_asub_block_]; my $ris_sub_block = $self->[_ris_sub_block_]; foreach my $seqno ( keys %{$rblock_type_of_seqno} ) { my $block_type = $rblock_type_of_seqno->{$seqno}; # use -asbl flag for an anonymous sub block if ( $ris_asub_block->{$seqno} ) { if ($rOpts_asbl) { $rbrace_left->{$seqno} = 1; } } # use -sbl flag for a named sub elsif ( $ris_sub_block->{$seqno} ) { if ($rOpts_sbl) { $rbrace_left->{$seqno} = 1; } } # use -bl flag if not a sub block of any type else { if ( $rOpts_bl && $block_type =~ /$bl_pattern/ && $block_type !~ /$bl_exclusion_pattern/ ) { $rbrace_left->{$seqno} = 1; } } } return; } ## end sub braces_left_setup sub bli_adjustment { # Called once per file to implement the --brace-left-and-indent option. # If -bli is set, adds one continuation indentation for certain braces my $self = shift; return unless ( $rOpts->{'brace-left-and-indent'} ); my $rLL = $self->[_rLL_]; return unless ( defined($rLL) && @{$rLL} ); my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; my $ris_bli_container = $self->[_ris_bli_container_]; my $rbrace_left = $self->[_rbrace_left_]; my $K_opening_container = $self->[_K_opening_container_]; my $K_closing_container = $self->[_K_closing_container_]; foreach my $seqno ( keys %{$rblock_type_of_seqno} ) { my $block_type = $rblock_type_of_seqno->{$seqno}; if ( $block_type && $block_type =~ /$bli_pattern/ && $block_type !~ /$bli_exclusion_pattern/ ) { $ris_bli_container->{$seqno} = 1; $rbrace_left->{$seqno} = 1; my $Ko = $K_opening_container->{$seqno}; my $Kc = $K_closing_container->{$seqno}; if ( defined($Ko) && defined($Kc) ) { $rLL->[$Kc]->[_CI_LEVEL_] = ++$rLL->[$Ko]->[_CI_LEVEL_]; } } } return; } ## end sub bli_adjustment sub find_multiline_qw { my ( $self, $rqw_lines ) = @_; # Multiline qw quotes are not sequenced items like containers { [ ( # but behave in some respects in a similar way. So this routine finds them # and creates a separate sequence number system for later use. # This is straightforward because they always begin at the end of one line # and end at the beginning of a later line. This is true no matter how we # finally make our line breaks, so we can find them before deciding on new # line breaks. # Input parameter: # if $rqw_lines is defined it is a ref to array of all line index numbers # for which there is a type 'q' qw quote at either end of the line. This # was defined by sub resync_lines_and_tokens for efficiency. # my $rlines = $self->[_rlines_]; # if $rqw_lines is not defined (this will occur with -io option) then we # will have to scan all lines. if ( !defined($rqw_lines) ) { $rqw_lines = [ 0 .. @{$rlines} - 1 ]; } # if $rqw_lines is defined but empty, just return because there are no # multiline qw's else { if ( !@{$rqw_lines} ) { return } } my $rstarting_multiline_qw_seqno_by_K = {}; my $rending_multiline_qw_seqno_by_K = {}; my $rKrange_multiline_qw_by_seqno = {}; my $rmultiline_qw_has_extra_level = {}; my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_]; my $rLL = $self->[_rLL_]; my $qw_seqno; my $num_qw_seqno = 0; my $K_start_multiline_qw; # For reference, here is the old loop, before $rqw_lines became available: ## foreach my $line_of_tokens ( @{$rlines} ) { foreach my $iline ( @{$rqw_lines} ) { my $line_of_tokens = $rlines->[$iline]; # Note that these first checks are required in case we have to scan # all lines, not just lines with type 'q' at the ends. my $line_type = $line_of_tokens->{_line_type}; next unless ( $line_type eq 'CODE' ); my $rK_range = $line_of_tokens->{_rK_range}; my ( $Kfirst, $Klast ) = @{$rK_range}; next unless ( defined($Kfirst) && defined($Klast) ); # skip blank line # Continuing a sequence of qw lines ... if ( defined($K_start_multiline_qw) ) { my $type = $rLL->[$Kfirst]->[_TYPE_]; # shouldn't happen if ( $type ne 'q' ) { DEVEL_MODE && print {*STDERR} <<EOM; STRANGE: started multiline qw at K=$K_start_multiline_qw but didn't see q qw at K=$Kfirst\n"; EOM $K_start_multiline_qw = undef; next; } my $Kprev = $self->K_previous_nonblank($Kfirst); my $Knext = $self->K_next_nonblank($Kfirst); my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b'; my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b'; if ( $type_m eq 'q' && $type_p ne 'q' ) { $rending_multiline_qw_seqno_by_K->{$Kfirst} = $qw_seqno; $rKrange_multiline_qw_by_seqno->{$qw_seqno} = [ $K_start_multiline_qw, $Kfirst ]; $K_start_multiline_qw = undef; $qw_seqno = undef; } } # Starting a new a sequence of qw lines ? if ( !defined($K_start_multiline_qw) && $rLL->[$Klast]->[_TYPE_] eq 'q' ) { my $Kprev = $self->K_previous_nonblank($Klast); my $Knext = $self->K_next_nonblank($Klast); my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b'; my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b'; if ( $type_m ne 'q' && $type_p eq 'q' ) { $num_qw_seqno++; $qw_seqno = 'q' . $num_qw_seqno; $K_start_multiline_qw = $Klast; $rstarting_multiline_qw_seqno_by_K->{$Klast} = $qw_seqno; } } } # Give multiline qw lists extra indentation instead of CI. This option # works well but is currently only activated when the -xci flag is set. # The reason is to avoid unexpected changes in formatting. if ($rOpts_extended_continuation_indentation) { while ( my ( $qw_seqno_x, $rKrange ) = each %{$rKrange_multiline_qw_by_seqno} ) { my ( $Kbeg, $Kend ) = @{$rKrange}; # require isolated closing token my $token_end = $rLL->[$Kend]->[_TOKEN_]; next unless ( length($token_end) == 1 && ( $is_closing_token{$token_end} || $token_end eq '>' ) ); # require isolated opening token my $token_beg = $rLL->[$Kbeg]->[_TOKEN_]; # allow space(s) after the qw if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) =~ m/\s/ ) { $token_beg =~ s/\s+//; } next unless ( length($token_beg) == 3 ); foreach my $KK ( $Kbeg + 1 .. $Kend - 1 ) { $rLL->[$KK]->[_LEVEL_]++; $rLL->[$KK]->[_CI_LEVEL_] = 0; } # set flag for -wn option, which will remove the level $rmultiline_qw_has_extra_level->{$qw_seqno_x} = 1; } } # For the -lp option we need to mark all parent containers of # multiline quotes if ( $rOpts_line_up_parentheses && !$rOpts_extended_line_up_parentheses ) { while ( my ( $qw_seqno_x, $rKrange ) = each %{$rKrange_multiline_qw_by_seqno} ) { my ( $Kbeg, $Kend ) = @{$rKrange}; my $parent_seqno = $self->parent_seqno_by_K($Kend); next unless ($parent_seqno); # If the parent container exactly surrounds this qw, then -lp # formatting seems to work so we will not mark it. my $is_tightly_contained; my $Kn = $self->K_next_nonblank($Kend); my $seqno_n = defined($Kn) ? $rLL->[$Kn]->[_TYPE_SEQUENCE_] : undef; if ( defined($seqno_n) && $seqno_n eq $parent_seqno ) { my $Kp = $self->K_previous_nonblank($Kbeg); my $seqno_p = defined($Kp) ? $rLL->[$Kp]->[_TYPE_SEQUENCE_] : undef; if ( defined($seqno_p) && $seqno_p eq $parent_seqno ) { $is_tightly_contained = 1; } } $ris_excluded_lp_container->{$parent_seqno} = 1 unless ($is_tightly_contained); # continue up the tree marking parent containers while (1) { $parent_seqno = $self->[_rparent_of_seqno_]->{$parent_seqno}; last if ( !defined($parent_seqno) ); last if ( $parent_seqno eq SEQ_ROOT ); $ris_excluded_lp_container->{$parent_seqno} = 1; } } } $self->[_rstarting_multiline_qw_seqno_by_K_] = $rstarting_multiline_qw_seqno_by_K; $self->[_rending_multiline_qw_seqno_by_K_] = $rending_multiline_qw_seqno_by_K; $self->[_rKrange_multiline_qw_by_seqno_] = $rKrange_multiline_qw_by_seqno; $self->[_rmultiline_qw_has_extra_level_] = $rmultiline_qw_has_extra_level; return; } ## end sub find_multiline_qw use constant DEBUG_COLLAPSED_LENGTHS => 0; # Minimum space reserved for contents of a code block. A value of 40 has given # reasonable results. With a large line length, say -l=120, this will not # normally be noticeable but it will prevent making a mess in some edge cases. use constant MIN_BLOCK_LEN => 40; my %is_handle_type; BEGIN { my @q = qw( w C U G i k => ); @is_handle_type{@q} = (1) x scalar(@q); my $i = 0; use constant { _max_prong_len_ => $i++, _handle_len_ => $i++, _seqno_o_ => $i++, _iline_o_ => $i++, _K_o_ => $i++, _K_c_ => $i++, _interrupted_list_rule_ => $i++, }; } ## end BEGIN sub is_fragile_block_type { my ( $self, $block_type, $seqno ) = @_; # Given: # $block_type = the block type of a token, and # $seqno = its sequence number # Return: # true if this block type stays broken after being broken, # false otherwise # This sub has been added to isolate a tricky decision needed # to fix issue b1428. # The coding here needs to agree with: # - sub process_line where variable '$rbrace_follower' is set # - sub process_line_inner_loop where variable '$is_opening_BLOCK' is set, if ( $is_sort_map_grep_eval{$block_type} || $block_type eq 't' || $self->[_rshort_nested_]->{$seqno} ) { return 0; } return 1; } ## end sub is_fragile_block_type { ## closure xlp_collapsed_lengths my $max_prong_len; my $len; my $last_nonblank_type; my @stack; sub xlp_collapsed_lengths_initialize { $max_prong_len = 0; $len = 0; $last_nonblank_type = 'b'; @stack = (); push @stack, [ 0, # $max_prong_len, 0, # $handle_len, SEQ_ROOT, # $seqno, undef, # $iline, undef, # $KK, undef, # $K_c, undef, # $interrupted_list_rule ]; return; } ## end sub xlp_collapsed_lengths_initialize sub cumulative_length_to_comma { my ( $self, $KK, $K_comma, $K_closing ) = @_; # Given: # $KK = index of starting token, or blank before start # $K_comma = index of line-ending comma # $K_closing = index of the container closing token # Return: # $length = cumulative length of the term my $rLL = $self->[_rLL_]; if ( $rLL->[$KK]->[_TYPE_] eq 'b' ) { $KK++ } my $length = 0; if ( $KK < $K_comma && $rLL->[$K_comma]->[_TYPE_] eq ',' # should be true # Ignore if terminal comma, causes instability (b1297, # b1330) && ( $K_closing - $K_comma > 2 || ( $K_closing - $K_comma == 2 && $rLL->[ $K_comma + 1 ]->[_TYPE_] ne 'b' ) ) # The comma should be in this container && ( $rLL->[$K_comma]->[_LEVEL_] - 1 == $rLL->[$K_closing]->[_LEVEL_] ) ) { # An additional check: if line ends in ), and the ) has vtc then # skip this estimate. Otherwise, vtc can give oscillating results. # Fixes b1448. For example, this could be unstable: # ( $os ne 'win' ? ( -selectcolor => "red" ) : () ), # | |^--K_comma # | ^-- K_prev # ^--- KK # An alternative, possibly better strategy would be to try to turn # off -vtc locally, but it turns out to be difficult to locate the # appropriate closing token when it is not on the same line as its # opening token. my $K_prev = $self->K_previous_nonblank($K_comma); if ( defined($K_prev) && $K_prev >= $KK && $rLL->[$K_prev]->[_TYPE_SEQUENCE_] ) { my $token = $rLL->[$K_prev]->[_TOKEN_]; my $type = $rLL->[$K_prev]->[_TYPE_]; if ( $closing_vertical_tightness{$token} && $type ne 'R' ) { ## type 'R' does not normally get broken, so ignore ## skip length calculation return 0; } } my $starting_len = $KK >= 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0; $length = $rLL->[$K_comma]->[_CUMULATIVE_LENGTH_] - $starting_len; } return $length; } ## end sub cumulative_length_to_comma sub xlp_collapsed_lengths { my $self = shift; #---------------------------------------------------------------- # Define the collapsed lengths of containers for -xlp indentation #---------------------------------------------------------------- # We need an estimate of the minimum required line length starting at # any opening container for the -xlp style. This is needed to avoid # using too much indentation space for lower level containers and # thereby running out of space for outer container tokens due to the # maximum line length limit. # The basic idea is that at each node in the tree we imagine that we # have a fork with a handle and collapsible prongs: # # |------------ # |-------- # ------------|------- # handle |------------ # |-------- # prongs # # Each prong has a minimum collapsed length. The collapsed length at a # node is the maximum of these minimum lengths, plus the handle length. # Each of the prongs may itself be a tree node. # This is just a rough calculation to get an approximate starting point # for indentation. Later routines will be more precise. It is # important that these estimates be independent of the line breaks of # the input stream in order to avoid instabilities. my $rLL = $self->[_rLL_]; my $rlines = $self->[_rlines_]; my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_]; my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_]; my $K_start_multiline_qw; my $level_start_multiline_qw = 0; xlp_collapsed_lengths_initialize(); #-------------------------------- # Loop over all lines in the file #-------------------------------- my $iline = -1; my $skip_next_line; foreach my $line_of_tokens ( @{$rlines} ) { $iline++; if ($skip_next_line) { $skip_next_line = 0; next; } my $line_type = $line_of_tokens->{_line_type}; next if ( $line_type ne 'CODE' ); my $CODE_type = $line_of_tokens->{_code_type}; # Always skip blank lines next if ( $CODE_type eq 'BL' ); # Note on other line types: # 'FS' (Format Skipping) lines may contain opening/closing tokens so # we have to process them to keep the stack correctly sequenced # 'VB' (Verbatim) lines could be skipped, but testing shows that # results look better if we include their lengths. # Also note that we could exclude -xlp formatting of containers with # 'FS' and 'VB' lines, but in testing that was not really beneficial # So we process tokens in 'FS' and 'VB' lines like all the rest... my $rK_range = $line_of_tokens->{_rK_range}; my ( $K_first, $K_last ) = @{$rK_range}; next unless ( defined($K_first) && defined($K_last) ); my $has_comment = $rLL->[$K_last]->[_TYPE_] eq '#'; # Always ignore block comments next if ( $has_comment && $K_first == $K_last ); # Handle an intermediate line of a multiline qw quote. These may # require including some -ci or -i spaces. See cases c098/x063. # Updated to check all lines (not just $K_first==$K_last) to fix # b1316 my $K_begin_loop = $K_first; if ( $rLL->[$K_first]->[_TYPE_] eq 'q' ) { my $KK = $K_first; my $level = $rLL->[$KK]->[_LEVEL_]; my $ci_level = $rLL->[$KK]->[_CI_LEVEL_]; # remember the level of the start if ( !defined($K_start_multiline_qw) ) { $K_start_multiline_qw = $K_first; $level_start_multiline_qw = $level; my $seqno_qw = $self->[_rstarting_multiline_qw_seqno_by_K_] ->{$K_start_multiline_qw}; if ( !$seqno_qw ) { my $Kp = $self->K_previous_nonblank($K_first); if ( defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' ) { $K_start_multiline_qw = $Kp; $level_start_multiline_qw = $rLL->[$K_start_multiline_qw]->[_LEVEL_]; } else { # Fix for b1319, b1320 $K_start_multiline_qw = undef; } } } if ( defined($K_start_multiline_qw) ) { $len = $rLL->[$KK]->[_CUMULATIVE_LENGTH_] - $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; # We may have to add the spaces of one level or ci level # ... it depends depends on the -xci flag, the -wn flag, # and if the qw uses a container token as the quote # delimiter. # First rule: add ci if there is a $ci_level if ($ci_level) { $len += $rOpts_continuation_indentation; } # Second rule: otherwise, look for an extra indentation # level from the start and add one indentation level if # found. else { if ( $level > $level_start_multiline_qw ) { $len += $rOpts_indent_columns; } } if ( $len > $max_prong_len ) { $max_prong_len = $len } $last_nonblank_type = 'q'; $K_begin_loop = $K_first + 1; # We can skip to the next line if more tokens next if ( $K_begin_loop > $K_last ); } } $K_start_multiline_qw = undef; # Find the terminal token, before any side comment my $K_terminal = $K_last; if ($has_comment) { $K_terminal -= 1; $K_terminal -= 1 if ( $rLL->[$K_terminal]->[_TYPE_] eq 'b' && $K_terminal > $K_first ); } # Use length to terminal comma if interrupted list rule applies if ( @stack && $stack[-1]->[_interrupted_list_rule_] ) { my $K_c = $stack[-1]->[_K_c_]; if ( defined($K_c) ) { #---------------------------------------------------------- # BEGIN patch for issue b1408: If this line ends in an # opening token, look for the closing token and comma at # the end of the next line. If so, combine the two lines to # get the correct sums. This problem seems to require -xlp # -vtc=2 and blank lines to occur. Use %is_opening_type to # fix b1431. #---------------------------------------------------------- if ( $is_opening_type{ $rLL->[$K_terminal]->[_TYPE_] } && !$has_comment ) { my $seqno_end = $rLL->[$K_terminal]->[_TYPE_SEQUENCE_]; my $Kc_test = $rLL->[$K_terminal]->[_KNEXT_SEQ_ITEM_]; # We are looking for a short broken remnant on the next # line; something like the third line here (b1408): # parent => # Moose::Util::TypeConstraints::find_type_constraint( # 'RefXX' ), # or this # # Help::WorkSubmitter->_filter_chores_and_maybe_warn_user( # $story_set_all_chores), # or this (b1431): # $issue->{ # 'borrowernumber'}, # borrowernumber if ( defined($Kc_test) && $seqno_end == $rLL->[$Kc_test]->[_TYPE_SEQUENCE_] && $rLL->[$Kc_test]->[_LINE_INDEX_] == $iline + 1 ) { my $line_of_tokens_next = $rlines->[ $iline + 1 ]; my $rtype_count = $rtype_count_by_seqno->{$seqno_end}; my ( $K_first_next, $K_terminal_next ) = @{ $line_of_tokens_next->{_rK_range} }; # backup at a side comment if ( defined($K_terminal_next) && $rLL->[$K_terminal_next]->[_TYPE_] eq '#' ) { my $Kprev = $self->K_previous_nonblank($K_terminal_next); if ( defined($Kprev) && $Kprev >= $K_first_next ) { $K_terminal_next = $Kprev; } } if ( defined($K_terminal_next) # next line ends with a comma && $rLL->[$K_terminal_next]->[_TYPE_] eq ',' # which follows the closing container token && ( $K_terminal_next - $Kc_test == 1 || ( $K_terminal_next - $Kc_test == 2 && $rLL->[ $K_terminal_next - 1 ] ->[_TYPE_] eq 'b' ) ) # no commas in the container && ( !defined($rtype_count) || !$rtype_count->{','} ) # for now, restrict this to a container with # just 1 or two tokens && $K_terminal_next - $K_terminal <= 5 ) { # combine the next line with the current line $K_terminal = $K_terminal_next; $skip_next_line = 1; if (DEBUG_COLLAPSED_LENGTHS) { print "Combining lines at line $iline\n"; } } } } #-------------------------- # END patch for issue b1408 #-------------------------- if ( $rLL->[$K_terminal]->[_TYPE_] eq ',' ) { my $length = $self->cumulative_length_to_comma( $K_first, $K_terminal, $K_c ); # Fix for b1331: at a broken => item, include the # length of the previous half of the item plus one for # the missing space if ( $last_nonblank_type eq '=>' ) { $length += $len + 1; } if ( $length > $max_prong_len ) { $max_prong_len = $length; } } } } #---------------------------------- # Loop over all tokens on this line #---------------------------------- $self->xlp_collapse_lengths_inner_loop( $iline, $K_begin_loop, $K_terminal, $K_last ); # Now take care of any side comment; if ($has_comment) { if ($rOpts_ignore_side_comment_lengths) { $len = 0; } else { # For a side comment when -iscl is not set, measure length from # the start of the previous nonblank token my $len0 = $K_terminal > 0 ? $rLL->[ $K_terminal - 1 ]->[_CUMULATIVE_LENGTH_] : 0; $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] - $len0; if ( $len > $max_prong_len ) { $max_prong_len = $len } } } } ## end loop over lines if (DEBUG_COLLAPSED_LENGTHS) { print "\nCollapsed lengths--\n"; foreach my $key ( sort { $a <=> $b } keys %{$rcollapsed_length_by_seqno} ) { my $clen = $rcollapsed_length_by_seqno->{$key}; print "$key -> $clen\n"; } } return; } ## end sub xlp_collapsed_lengths sub xlp_collapse_lengths_inner_loop { my ( $self, $iline, $K_begin_loop, $K_terminal, $K_last ) = @_; my $rLL = $self->[_rLL_]; my $K_closing_container = $self->[_K_closing_container_]; my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_]; my $ris_permanently_broken = $self->[_ris_permanently_broken_]; my $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; my $rhas_broken_list = $self->[_rhas_broken_list_]; my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_]; #---------------------------------- # Loop over tokens on this line ... #---------------------------------- foreach my $KK ( $K_begin_loop .. $K_terminal ) { my $type = $rLL->[$KK]->[_TYPE_]; next if ( $type eq 'b' ); #------------------------ # Handle sequenced tokens #------------------------ my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_]; if ($seqno) { my $token = $rLL->[$KK]->[_TOKEN_]; #---------------------------- # Entering a new container... #---------------------------- if ( $is_opening_token{$token} && defined( $K_closing_container->{$seqno} ) ) { # save current prong length $stack[-1]->[_max_prong_len_] = $max_prong_len; $max_prong_len = 0; # Start new prong one level deeper my $handle_len = 0; if ( $rblock_type_of_seqno->{$seqno} ) { # code blocks do not use -lp indentation, but behave as # if they had a handle of one indentation length $handle_len = $rOpts_indent_columns; } else { if ( $is_handle_type{$last_nonblank_type} ) { $handle_len = $len; $handle_len += 1 if ( $KK > 0 && $rLL->[ $KK - 1 ]->[_TYPE_] eq 'b' ); } } # Set a flag if the 'Interrupted List Rule' will be applied # (see sub copy_old_breakpoints). # - Added check on has_broken_list to fix issue b1298 my $interrupted_list_rule = $ris_permanently_broken->{$seqno} && $ris_list_by_seqno->{$seqno} && !$rhas_broken_list->{$seqno} && !$rOpts_ignore_old_breakpoints; # NOTES: Since we are looking at old line numbers we have # to be very careful not to introduce an instability. # This following causes instability (b1288-b1296): # $interrupted_list_rule ||= # $rOpts_break_at_old_comma_breakpoints; # - We could turn off the interrupted list rule if there is # a broken sublist, to follow 'Compound List Rule 1'. # - We could use the _rhas_broken_list_ flag for this. # - But it seems safer not to do this, to avoid # instability, since the broken sublist could be # temporary. It seems better to let the formatting # stabilize by itself after one or two iterations. # - So, not doing this for now # Turn off the interrupted list rule if -vmll is set and a # list has '=>' characters. This avoids instabilities due # to dependence on old line breaks; issue b1325. if ( $interrupted_list_rule && $rOpts_variable_maximum_line_length ) { my $rtype_count = $rtype_count_by_seqno->{$seqno}; if ( $rtype_count && $rtype_count->{'=>'} ) { $interrupted_list_rule = 0; } } my $K_c = $K_closing_container->{$seqno}; # Add length of any terminal list item if interrupted # so that the result is the same as if the term is # in the next line (b1446). if ( $interrupted_list_rule && $KK < $K_terminal # The line should end in a comma # NOTE: this currently assumes break after comma. # As long as the other call to cumulative_length.. # makes the same assumption we should remain stable. && $rLL->[$K_terminal]->[_TYPE_] eq ',' ) { $max_prong_len = $self->cumulative_length_to_comma( $KK + 1, $K_terminal, $K_c ); } push @stack, [ $max_prong_len, $handle_len, $seqno, $iline, $KK, $K_c, $interrupted_list_rule ]; } #-------------------- # Exiting a container #-------------------- elsif ( $is_closing_token{$token} && @stack ) { # The current prong ends - get its handle my $item = pop @stack; my $handle_len = $item->[_handle_len_]; my $seqno_o = $item->[_seqno_o_]; my $iline_o = $item->[_iline_o_]; my $K_o = $item->[_K_o_]; my $K_c_expect = $item->[_K_c_]; my $collapsed_len = $max_prong_len; if ( $seqno_o ne $seqno ) { # This can happen if input file has brace errors. # Otherwise it shouldn't happen. Not fatal but -lp # formatting could get messed up. if ( DEVEL_MODE && !get_saw_brace_error() ) { Fault(<<EOM); sequence numbers differ; at CLOSING line $iline, seq=$seqno, Kc=$KK .. at OPENING line $iline_o, seq=$seqno_o, Ko=$K_o, expecting Kc=$K_c_expect EOM } } #------------------------------------------ # Rules to avoid scrunching code blocks ... #------------------------------------------ # Some test cases: # c098/x107 x108 x110 x112 x114 x115 x117 x118 x119 my $block_type = $rblock_type_of_seqno->{$seqno}; if ($block_type) { my $K_c = $KK; my $block_length = MIN_BLOCK_LEN; my $is_one_line_block; my $level = $rLL->[$K_o]->[_LEVEL_]; if ( defined($K_o) && defined($K_c) ) { # note: fixed 3 May 2022 (removed 'my') $block_length = $rLL->[ $K_c - 1 ]->[_CUMULATIVE_LENGTH_] - $rLL->[$K_o]->[_CUMULATIVE_LENGTH_]; $is_one_line_block = $iline == $iline_o; } # Code block rule 1: Use the total block length if # it is less than the minimum. if ( $block_length < MIN_BLOCK_LEN ) { $collapsed_len = $block_length; } # Code block rule 2: Use the full length of a # one-line block to avoid breaking it, unless # extremely long. We do not need to do a precise # check here, because if it breaks then it will # stay broken on later iterations. elsif ( $is_one_line_block && $block_length < $maximum_line_length_at_level[$level] # But skip this for blocks types which can reform, # like sort/map/grep/eval blocks, to avoid # instability (b1345, b1428) && $self->is_fragile_block_type( $block_type, $seqno ) ) { $collapsed_len = $block_length; } # Code block rule 3: Otherwise the length should be # at least MIN_BLOCK_LEN to avoid scrunching code # blocks. elsif ( $collapsed_len < MIN_BLOCK_LEN ) { $collapsed_len = MIN_BLOCK_LEN; } else { ## ok } } # Store the result. Some extra space, '2', allows for # length of an opening token, inside space, comma, ... # This constant has been tuned to give good overall # results. $collapsed_len += 2; $rcollapsed_length_by_seqno->{$seqno} = $collapsed_len; # Restart scanning the lower level prong if (@stack) { $max_prong_len = $stack[-1]->[_max_prong_len_]; $collapsed_len += $handle_len; if ( $collapsed_len > $max_prong_len ) { $max_prong_len = $collapsed_len; } } } # it is a ternary - no special processing for these yet else { } $len = 0; $last_nonblank_type = $type; next; } #---------------------------- # Handle non-container tokens #---------------------------- my $token_length = $rLL->[$KK]->[_TOKEN_LENGTH_]; # Count lengths of things like 'xx => yy' as a single item if ( $type eq '=>' ) { $len += $token_length + 1; # fix $len for -naws, issue b1457 if ( !$rOpts_add_whitespace ) { if ( defined( $rLL->[ $KK + 1 ] ) && $rLL->[ $KK + 1 ]->[_TYPE_] ne 'b' ) { $len -= 1; } } if ( $len > $max_prong_len ) { $max_prong_len = $len } } elsif ( $last_nonblank_type eq '=>' ) { $len += $token_length; if ( $len > $max_prong_len ) { $max_prong_len = $len } # but only include one => per item $len = $token_length; } # include everything to end of line after a here target elsif ( $type eq 'h' ) { $len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] - $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_]; if ( $len > $max_prong_len ) { $max_prong_len = $len } } # for everything else just use the token length else { $len = $token_length; if ( $len > $max_prong_len ) { $max_prong_len = $len } } $last_nonblank_type = $type; } ## end loop over tokens on this line return; } ## end sub xlp_collapse_lengths_inner_loop } ## end closure xlp_collapsed_lengths sub is_excluded_lp { # Decide if this container is excluded by user request: # returns true if this token is excluded (i.e., may not use -lp) # returns false otherwise # The control hash can either describe: # what to exclude: $line_up_parentheses_control_is_lxpl = 1, or # what to include: $line_up_parentheses_control_is_lxpl = 0 # Input parameter: # $KK = index of the container opening token my ( $self, $KK ) = @_; my $rLL = $self->[_rLL_]; my $rtoken_vars = $rLL->[$KK]; my $token = $rtoken_vars->[_TOKEN_]; my $rflags = $line_up_parentheses_control_hash{$token}; #----------------------------------------------- # TEST #1: check match to listed container types #----------------------------------------------- if ( !defined($rflags) ) { # There is no entry for this container, so we are done return !$line_up_parentheses_control_is_lxpl; } my ( $flag1, $flag2 ) = @{$rflags}; #----------------------------------------------------------- # TEST #2: check match to flag1, the preceding nonblank word #----------------------------------------------------------- my $match_flag1 = !defined($flag1) || $flag1 eq '*'; if ( !$match_flag1 ) { # Find the previous token my ( $is_f, $is_k, $is_w ); my $Kp = $self->K_previous_nonblank($KK); if ( defined($Kp) ) { my $type_p = $rLL->[$Kp]->[_TYPE_]; my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_]; # keyword? $is_k = $type_p eq 'k'; # function call? $is_f = $self->[_ris_function_call_paren_]->{$seqno}; # either keyword or function call? $is_w = $is_k || $is_f; } # Check for match based on flag1 and the previous token: if ( $flag1 eq 'k' ) { $match_flag1 = $is_k } elsif ( $flag1 eq 'K' ) { $match_flag1 = !$is_k } elsif ( $flag1 eq 'f' ) { $match_flag1 = $is_f } elsif ( $flag1 eq 'F' ) { $match_flag1 = !$is_f } elsif ( $flag1 eq 'w' ) { $match_flag1 = $is_w } elsif ( $flag1 eq 'W' ) { $match_flag1 = !$is_w } else { ## no match } } # See if we can exclude this based on the flag1 test... if ($line_up_parentheses_control_is_lxpl) { return 1 if ($match_flag1); } else { return 1 if ( !$match_flag1 ); } #------------------------------------------------------------- # TEST #3: exclusion based on flag2 and the container contents #------------------------------------------------------------- # Note that this is an exclusion test for both -lpxl or -lpil input methods # The options are: # 0 or blank: ignore container contents # 1 exclude non-lists or lists with sublists # 2 same as 1 but also exclude lists with code blocks my $match_flag2; if ($flag2) { my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_]; my $is_list = $self->[_ris_list_by_seqno_]->{$seqno}; my $has_list = $self->[_rhas_list_]->{$seqno}; my $has_code_block = $self->[_rhas_code_block_]->{$seqno}; my $has_ternary = $self->[_rhas_ternary_]->{$seqno}; if ( !$is_list || $has_list || $flag2 eq '2' && ( $has_code_block || $has_ternary ) ) { $match_flag2 = 1; } } return $match_flag2; } ## end sub is_excluded_lp sub set_excluded_lp_containers { my ($self) = @_; return unless ($rOpts_line_up_parentheses); my $rLL = $self->[_rLL_]; return unless ( defined($rLL) && @{$rLL} ); my $K_opening_container = $self->[_K_opening_container_]; my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_]; my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; foreach my $seqno ( keys %{$K_opening_container} ) { # code blocks are always excluded by the -lp coding so we can skip them next if ( $rblock_type_of_seqno->{$seqno} ); my $KK = $K_opening_container->{$seqno}; next unless defined($KK); # see if a user exclusion rule turns off -lp for this container if ( $self->is_excluded_lp($KK) ) { $ris_excluded_lp_container->{$seqno} = 1; } } return; } ## end sub set_excluded_lp_containers ###################################### # CODE SECTION 6: Process line-by-line ###################################### sub process_all_lines { #---------------------------------------------------------- # Main loop to format all lines of a file according to type #---------------------------------------------------------- my $self = shift; my $rlines = $self->[_rlines_]; my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'}; my $file_writer_object = $self->[_file_writer_object_]; my $logger_object = $self->[_logger_object_]; my $vertical_aligner_object = $self->[_vertical_aligner_object_]; my $save_logfile = $self->[_save_logfile_]; # Flag to prevent blank lines when POD occurs in a format skipping sect. my $in_format_skipping_section; # set locations for blanks around long runs of keywords my $rwant_blank_line_after = $self->keyword_group_scan(); my $line_type = EMPTY_STRING; my $i_last_POD_END = -10; my $i = -1; foreach my $line_of_tokens ( @{$rlines} ) { # insert blank lines requested for keyword sequences if ( defined( $rwant_blank_line_after->{$i} ) && $rwant_blank_line_after->{$i} == 1 ) { $self->want_blank_line(); } $i++; my $last_line_type = $line_type; $line_type = $line_of_tokens->{_line_type}; my $input_line = $line_of_tokens->{_line_text}; # _line_type codes are: # SYSTEM - system-specific code before hash-bang line # CODE - line of perl code (including comments) # POD_START - line starting pod, such as '=head' # POD - pod documentation text # POD_END - last line of pod section, '=cut' # HERE - text of here-document # HERE_END - last line of here-doc (target word) # FORMAT - format section # FORMAT_END - last line of format section, '.' # SKIP - code skipping section # SKIP_END - last line of code skipping section, '#>>V' # DATA_START - __DATA__ line # DATA - unidentified text following __DATA__ # END_START - __END__ line # END - unidentified text following __END__ # ERROR - we are in big trouble, probably not a perl script # put a blank line after an =cut which comes before __END__ and __DATA__ # (required by podchecker) if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) { $i_last_POD_END = $i; $file_writer_object->reset_consecutive_blank_lines(); if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) { $self->want_blank_line(); } } # handle line of code.. if ( $line_type eq 'CODE' ) { my $CODE_type = $line_of_tokens->{_code_type}; $in_format_skipping_section = $CODE_type eq 'FS'; # Handle blank lines if ( $CODE_type eq 'BL' ) { # Keep this blank? Start with the flag -kbl=n, where # n=0 ignore all old blank lines # n=1 stable: keep old blanks, but limited by -mbl=n # n=2 keep all old blank lines, regardless of -mbl=n # If n=0 we delete all old blank lines and let blank line # rules generate any needed blank lines. my $kgb_keep = $rOpts_keep_old_blank_lines; # Then delete lines requested by the keyword-group logic if # allowed if ( $kgb_keep == 1 && defined( $rwant_blank_line_after->{$i} ) && $rwant_blank_line_after->{$i} == 2 ) { $kgb_keep = 0; } # But always keep a blank line following an =cut if ( $i - $i_last_POD_END < 3 && !$kgb_keep ) { $kgb_keep = 1; } if ($kgb_keep) { $self->flush($CODE_type); $file_writer_object->write_blank_code_line( $rOpts_keep_old_blank_lines == 2 ); $self->[_last_line_leading_type_] = 'b'; } next; } else { # Let logger see all non-blank lines of code. This is a slow # operation so we avoid it if it is not going to be saved. if ( $save_logfile && $logger_object ) { $logger_object->black_box( $line_of_tokens, $vertical_aligner_object->get_output_line_number ); } } # Handle Format Skipping (FS) and Verbatim (VB) Lines if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) { $self->write_unindented_line($input_line); $file_writer_object->reset_consecutive_blank_lines(); next; } # Handle all other lines of code $self->process_line_of_CODE($line_of_tokens); } # handle line of non-code.. else { # set special flags my $skip_line = 0; if ( substr( $line_type, 0, 3 ) eq 'POD' ) { # Pod docs should have a preceding blank line. But stay # out of __END__ and __DATA__ sections, because # the user may be using this section for any purpose whatsoever if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; } if ( $rOpts->{'trim-pod'} ) { chomp $input_line; $input_line =~ s/\s+$//; $input_line .= "\n"; } if ( !$skip_line && !$in_format_skipping_section && $line_type eq 'POD_START' && !$self->[_saw_END_or_DATA_] ) { $self->want_blank_line(); } } # leave the blank counters in a predictable state # after __END__ or __DATA__ elsif ( $line_type eq 'END_START' || $line_type eq 'DATA_START' ) { $file_writer_object->reset_consecutive_blank_lines(); $self->[_saw_END_or_DATA_] = 1; } # Patch to avoid losing blank lines after a code-skipping block; # fixes case c047. elsif ( $line_type eq 'SKIP_END' ) { $file_writer_object->reset_consecutive_blank_lines(); } else { ## some other line type } # write unindented non-code line if ( !$skip_line ) { $self->write_unindented_line($input_line); } } } return; } ## end sub process_all_lines { ## closure keyword_group_scan # this is the return var my $rhash_of_desires; # user option variables for -kgb my ( $rOpts_kgb_after, $rOpts_kgb_before, $rOpts_kgb_delete, $rOpts_kgb_inside, $rOpts_kgb_size_max, $rOpts_kgb_size_min, ); # group variables, initialized by kgb_initialize_group_vars my ( $ibeg, $iend, $count, $level_beg, $K_closing ); my ( @iblanks, @group, @subgroup ); # line variables, updated by sub keyword_group_scan my ( $line_type, $CODE_type, $K_first, $K_last ); my $number_of_groups_seen; #------------------------ # -kgb helper subroutines #------------------------ sub kgb_initialize_options { # check and initialize user options for -kgb # return error flag: # true for some input error, do not continue # false if ok # Local copies of the various control parameters $rOpts_kgb_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba' $rOpts_kgb_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb' $rOpts_kgb_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd' $rOpts_kgb_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi' # A range of sizes can be input with decimal notation like 'min.max' # with any number of dots between the two numbers. Examples: # string => min max matches # 1.1 1 1 exactly 1 # 1.3 1 3 1,2, or 3 # 1..3 1 3 1,2, or 3 # 5 5 - 5 or more # 6. 6 - 6 or more # .2 - 2 up to 2 # 1.0 1 0 nothing my $rOpts_kgb_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs' ( $rOpts_kgb_size_min, $rOpts_kgb_size_max ) = split /\.+/, $rOpts_kgb_size; if ( $rOpts_kgb_size_min && $rOpts_kgb_size_min !~ /^\d+$/ || $rOpts_kgb_size_max && $rOpts_kgb_size_max !~ /^\d+$/ ) { Warn(<<EOM); Unexpected value for -kgbs: '$rOpts_kgb_size'; expecting 'min' or 'min.max'; ignoring all -kgb flags EOM # Turn this option off so that this message does not keep repeating # during iterations and other files. $rOpts->{'keyword-group-blanks-size'} = EMPTY_STRING; return $rhash_of_desires; } $rOpts_kgb_size_min = 1 unless ($rOpts_kgb_size_min); if ( $rOpts_kgb_size_max && $rOpts_kgb_size_max < $rOpts_kgb_size_min ) { return $rhash_of_desires; } # check codes for $rOpts_kgb_before and # $rOpts_kgb_after: # 0 = never (delete if exist) # 1 = stable (keep unchanged) # 2 = always (insert if missing) my $ok = $rOpts_kgb_size_min > 0 && ( $rOpts_kgb_before != 1 || $rOpts_kgb_after != 1 || $rOpts_kgb_inside || $rOpts_kgb_delete ); return $rhash_of_desires if ( !$ok ); return; } ## end sub kgb_initialize_options sub kgb_initialize_group_vars { # Definitions: # $ibeg = first line index of this entire group # $iend = last line index of this entire group # $count = total number of keywords seen in this entire group # $level_beg = indentation level of this group # @group = [ $i, $token, $count ] =list of all keywords & blanks # @subgroup = $j, index of group where token changes # @iblanks = line indexes of blank lines in input stream in this group # where i=starting line index # token (the keyword) # count = number of this token in this subgroup # j = index in group where token changes $ibeg = -1; $iend = undef; $level_beg = -1; $K_closing = undef; $count = 0; @group = (); @subgroup = (); @iblanks = (); return; } ## end sub kgb_initialize_group_vars sub kgb_initialize_line_vars { $CODE_type = EMPTY_STRING; $K_first = undef; $K_last = undef; $line_type = EMPTY_STRING; return; } ## end sub kgb_initialize_line_vars sub kgb_initialize { # initialize all closure variables for -kgb # return: # true to cause immediate exit (something is wrong) # false to continue ... all is okay # This is the return variable: $rhash_of_desires = {}; # initialize and check user options; my $quit = kgb_initialize_options(); if ($quit) { return $quit } # initialize variables for the current group and subgroups: kgb_initialize_group_vars(); # initialize variables for the most recently seen line: kgb_initialize_line_vars(); $number_of_groups_seen = 0; # all okay return; } ## end sub kgb_initialize sub kgb_insert_blank_after { my ($i) = @_; $rhash_of_desires->{$i} = 1; my $ip = $i + 1; if ( defined( $rhash_of_desires->{$ip} ) && $rhash_of_desires->{$ip} == 2 ) { $rhash_of_desires->{$ip} = 0; } return; } ## end sub kgb_insert_blank_after sub kgb_split_into_sub_groups { # place blanks around long sub-groups of keywords # ...if requested return unless ($rOpts_kgb_inside); # loop over sub-groups, index k push @subgroup, scalar @group; my $kbeg = 1; my $kend = @subgroup - 1; foreach my $k ( $kbeg .. $kend ) { # index j runs through all keywords found my $j_b = $subgroup[ $k - 1 ]; my $j_e = $subgroup[$k] - 1; # index i is the actual line number of a keyword my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] }; my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] }; my $num = $count_e - $count_b + 1; # This subgroup runs from line $ib to line $ie-1, but may contain # blank lines if ( $num >= $rOpts_kgb_size_min ) { # if there are blank lines, we require that at least $num lines # be non-blank up to the boundary with the next subgroup. my $nog_b = my $nog_e = 1; if ( @iblanks && !$rOpts_kgb_delete ) { my $j_bb = $j_b + $num - 1; my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] }; $nog_b = $count_bb - $count_b + 1 == $num; my $j_ee = $j_e - ( $num - 1 ); my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] }; $nog_e = $count_e - $count_ee + 1 == $num; } if ( $nog_b && $k > $kbeg ) { kgb_insert_blank_after( $i_b - 1 ); } if ( $nog_e && $k < $kend ) { my ( $i_ep, $tok_ep, $count_ep ) = @{ $group[ $j_e + 1 ] }; kgb_insert_blank_after( $i_ep - 1 ); } } } return; } ## end sub kgb_split_into_sub_groups sub kgb_delete_if_blank { my ( $self, $i ) = @_; # delete line $i if it is blank my $rlines = $self->[_rlines_]; return if ( $i < 0 || $i >= @{$rlines} ); return if ( $rlines->[$i]->{_line_type} ne 'CODE' ); my $code_type = $rlines->[$i]->{_code_type}; if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; } return; } ## end sub kgb_delete_if_blank sub kgb_delete_inner_blank_lines { # always remove unwanted trailing blank lines from our list return unless (@iblanks); while ( my $ibl = pop(@iblanks) ) { if ( $ibl < $iend ) { push @iblanks, $ibl; last } $iend = $ibl; } # now mark mark interior blank lines for deletion if requested return unless ($rOpts_kgb_delete); while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 } return; } ## end sub kgb_delete_inner_blank_lines sub kgb_end_group { # end a group of keywords my ( $self, $bad_ending ) = @_; if ( defined($ibeg) && $ibeg >= 0 ) { # then handle sufficiently large groups if ( $count >= $rOpts_kgb_size_min ) { $number_of_groups_seen++; # do any blank deletions regardless of the count kgb_delete_inner_blank_lines(); my $rlines = $self->[_rlines_]; if ( $ibeg > 0 ) { my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type}; # patch for hash bang line which is not currently marked as # a comment; mark it as a comment if ( $ibeg == 1 && !$code_type ) { my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text}; $code_type = 'BC' if ( $line_text && $line_text =~ /^#/ ); } # Do not insert a blank after a comment # (this could be subject to a flag in the future) if ( $code_type !~ /(?:BC|SBC|SBCX)/ ) { if ( $rOpts_kgb_before == INSERT ) { kgb_insert_blank_after( $ibeg - 1 ); } elsif ( $rOpts_kgb_before == DELETE ) { $self->kgb_delete_if_blank( $ibeg - 1 ); } else { ## == STABLE } } } # We will only put blanks before code lines. We could loosen # this rule a little, but we have to be very careful because # for example we certainly don't want to drop a blank line # after a line like this: # my $var = <<EOM; if ( $line_type eq 'CODE' && defined($K_first) ) { # - Do not put a blank before a line of different level # - Do not put a blank line if we ended the search badly # - Do not put a blank at the end of the file # - Do not put a blank line before a hanging side comment my $rLL = $self->[_rLL_]; my $level = $rLL->[$K_first]->[_LEVEL_]; my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_]; if ( $level == $level_beg && $ci_level == 0 && !$bad_ending && $iend < @{$rlines} && $CODE_type ne 'HSC' ) { if ( $rOpts_kgb_after == INSERT ) { kgb_insert_blank_after($iend); } elsif ( $rOpts_kgb_after == DELETE ) { $self->kgb_delete_if_blank( $iend + 1 ); } else { ## == STABLE } } } } kgb_split_into_sub_groups(); } # reset for another group kgb_initialize_group_vars(); return; } ## end sub kgb_end_group sub kgb_find_container_end { # If the keyword line is continued onto subsequent lines, find the # closing token '$K_closing' so that we can easily skip past the # contents of the container. # We only set this value if we find a simple list, meaning # -contents only one level deep # -not welded my ($self) = @_; # First check: skip if next line is not one deeper my $Knext_nonblank = $self->K_next_nonblank($K_last); return if ( !defined($Knext_nonblank) ); my $rLL = $self->[_rLL_]; my $level_next = $rLL->[$Knext_nonblank]->[_LEVEL_]; return if ( $level_next != $level_beg + 1 ); # Find the parent container of the first token on the next line my $parent_seqno = $self->parent_seqno_by_K($Knext_nonblank); return unless ( defined($parent_seqno) ); # Must not be a weld (can be unstable) return if ( $total_weld_count && $self->is_welded_at_seqno($parent_seqno) ); # Opening container must exist and be on this line my $Ko = $self->[_K_opening_container_]->{$parent_seqno}; return if ( !defined($Ko) || $Ko <= $K_first || $Ko > $K_last ); # Verify that the closing container exists and is on a later line my $Kc = $self->[_K_closing_container_]->{$parent_seqno}; return if ( !defined($Kc) || $Kc <= $K_last ); # That's it $K_closing = $Kc; return; } ## end sub kgb_find_container_end sub kgb_add_to_group { my ( $self, $i, $token, $level ) = @_; # End the previous group if we have reached the maximum # group size if ( $rOpts_kgb_size_max && @group >= $rOpts_kgb_size_max ) { $self->kgb_end_group(); } if ( @group == 0 ) { $ibeg = $i; $level_beg = $level; $count = 0; } $count++; $iend = $i; # New sub-group? if ( !@group || $token ne $group[-1]->[1] ) { push @subgroup, scalar(@group); } push @group, [ $i, $token, $count ]; # remember if this line ends in an open container $self->kgb_find_container_end(); return; } ## end sub kgb_add_to_group #--------------------- # -kgb main subroutine #--------------------- sub keyword_group_scan { my $self = shift; # Called once per file to process --keyword-group-blanks-* parameters. # Task: # Manipulate blank lines around keyword groups (kgb* flags) # Scan all lines looking for runs of consecutive lines beginning with # selected keywords. Example keywords are 'my', 'our', 'local', ... but # they may be anything. We will set flags requesting that blanks be # inserted around and within them according to input parameters. Note # that we are scanning the lines as they came in in the input stream, so # they are not necessarily well formatted. # Returns: # The output of this sub is a return hash ref whose keys are the indexes # of lines after which we desire a blank line. For line index $i: # $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i # $rhash_of_desires->{$i} = 2 means we want blank line $i removed # Nothing to do if no blanks can be output. This test added to fix # case b760. if ( !$rOpts_maximum_consecutive_blank_lines ) { return $rhash_of_desires; } #--------------- # initialization #--------------- my $quit = kgb_initialize(); if ($quit) { return $rhash_of_desires } my $rLL = $self->[_rLL_]; my $rlines = $self->[_rlines_]; $self->kgb_end_group(); my $i = -1; my $Opt_repeat_count = $rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr' #---------------------------------- # loop over all lines of the source #---------------------------------- foreach my $line_of_tokens ( @{$rlines} ) { $i++; last if ( $Opt_repeat_count > 0 && $number_of_groups_seen >= $Opt_repeat_count ); kgb_initialize_line_vars(); $line_type = $line_of_tokens->{_line_type}; # always end a group at non-CODE if ( $line_type ne 'CODE' ) { $self->kgb_end_group(); next } $CODE_type = $line_of_tokens->{_code_type}; # end any group at a format skipping line if ( $CODE_type && $CODE_type eq 'FS' ) { $self->kgb_end_group(); next; } # continue in a verbatim (VB) type; it may be quoted text if ( $CODE_type eq 'VB' ) { if ( $ibeg >= 0 ) { $iend = $i; } next; } # and continue in blank (BL) types if ( $CODE_type eq 'BL' ) { if ( $ibeg >= 0 ) { $iend = $i; push @{iblanks}, $i; # propagate current subgroup token my $tok = $group[-1]->[1]; push @group, [ $i, $tok, $count ]; } next; } # examine the first token of this line my $rK_range = $line_of_tokens->{_rK_range}; ( $K_first, $K_last ) = @{$rK_range}; if ( !defined($K_first) ) { # Somewhat unexpected blank line.. # $rK_range is normally defined for line type CODE, but this can # happen for example if the input line was a single semicolon # which is being deleted. In that case there was code in the # input file but it is not being retained. So we can silently # return. return $rhash_of_desires; } my $level = $rLL->[$K_first]->[_LEVEL_]; my $type = $rLL->[$K_first]->[_TYPE_]; my $token = $rLL->[$K_first]->[_TOKEN_]; my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_]; # End a group 'badly' at an unexpected level. This will prevent # blank lines being incorrectly placed after the end of the group. # We are looking for any deviation from two acceptable patterns: # PATTERN 1: a simple list; secondary lines are at level+1 # PATTERN 2: a long statement; all secondary lines same level # This was added as a fix for case b1177, in which a complex # structure got incorrectly inserted blank lines. if ( $ibeg >= 0 ) { # Check for deviation from PATTERN 1, simple list: if ( defined($K_closing) && $K_first < $K_closing ) { $self->kgb_end_group(1) if ( $level != $level_beg + 1 ); } # Check for deviation from PATTERN 2, single statement: elsif ( $level != $level_beg ) { $self->kgb_end_group(1) } else { ## no deviation } } # Do not look for keywords in lists ( keyword 'my' can occur in # lists, see case b760); fixed for c048. if ( $self->is_list_by_K($K_first) ) { if ( $ibeg >= 0 ) { $iend = $i } next; } # see if this is a code type we seek (i.e. comment) if ( $CODE_type && $keyword_group_list_comment_pattern && $CODE_type =~ /$keyword_group_list_comment_pattern/ ) { my $tok = $CODE_type; # Continuing a group if ( $ibeg >= 0 && $level == $level_beg ) { $self->kgb_add_to_group( $i, $tok, $level ); } # Start new group else { # first end old group if any; we might be starting new # keywords at different level if ( $ibeg >= 0 ) { $self->kgb_end_group(); } $self->kgb_add_to_group( $i, $tok, $level ); } next; } # See if it is a keyword we seek, but never start a group in a # continuation line; the code may be badly formatted. if ( $ci_level == 0 && $type eq 'k' && $token =~ /$keyword_group_list_pattern/ ) { # Continuing a keyword group if ( $ibeg >= 0 && $level == $level_beg ) { $self->kgb_add_to_group( $i, $token, $level ); } # Start new keyword group else { # first end old group if any; we might be starting new # keywords at different level if ( $ibeg >= 0 ) { $self->kgb_end_group(); } $self->kgb_add_to_group( $i, $token, $level ); } next; } # This is not one of our keywords, but we are in a keyword group # so see if we should continue or quit elsif ( $ibeg >= 0 ) { # - bail out on a large level change; we may have walked into a # data structure or anonymous sub code. if ( $level > $level_beg + 1 || $level < $level_beg ) { $self->kgb_end_group(1); next; } # - keep going on a continuation line of the same level, since # it is probably a continuation of our previous keyword, # - and keep going past hanging side comments because we never # want to interrupt them. if ( ( ( $level == $level_beg ) && $ci_level > 0 ) || $CODE_type eq 'HSC' ) { $iend = $i; next; } # - continue if if we are within in a container which started # with the line of the previous keyword. if ( defined($K_closing) && $K_first <= $K_closing ) { # continue if entire line is within container if ( $K_last <= $K_closing ) { $iend = $i; next } # continue at ); or }; or ]; my $KK = $K_closing + 1; if ( $rLL->[$KK]->[_TYPE_] eq ';' ) { if ( $KK < $K_last ) { if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK } if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) { $self->kgb_end_group(1); next; } } $iend = $i; next; } $self->kgb_end_group(1); next; } # - end the group if none of the above $self->kgb_end_group(); next; } # not in a keyword group; continue else { next } } ## end of loop over all lines $self->kgb_end_group(); return $rhash_of_desires; } ## end sub keyword_group_scan } ## end closure keyword_group_scan ####################################### # CODE SECTION 7: Process lines of code ####################################### { ## begin closure process_line_of_CODE # The routines in this closure receive lines of code and combine them into # 'batches' and send them along. A 'batch' is the unit of code which can be # processed further as a unit. It has the property that it is the largest # amount of code into which which perltidy is free to place one or more # line breaks within it without violating any constraints. # When a new batch is formed it is sent to sub 'grind_batch_of_code'. # flags needed by the store routine my $line_of_tokens; my $no_internal_newlines; my $CODE_type; my $current_line_starts_in_quote; # range of K of tokens for the current line my ( $K_first, $K_last ); my ( $rLL, $radjusted_levels, $rparent_of_seqno, $rdepth_of_opening_seqno, $rblock_type_of_seqno, $ri_starting_one_line_block ); # past stored nonblank tokens and flags my ( $K_last_nonblank_code, $K_dangling_elsif, $is_static_block_comment, $last_CODE_type, $last_line_had_side_comment, $next_parent_seqno, $next_slevel, ); # Called once at the start of a new file sub initialize_process_line_of_CODE { $K_last_nonblank_code = undef; $K_dangling_elsif = 0; $is_static_block_comment = 0; $last_line_had_side_comment = 0; $next_parent_seqno = SEQ_ROOT; $next_slevel = undef; return; } ## end sub initialize_process_line_of_CODE # Batch variables: these describe the current batch of code being formed # and sent down the pipeline. They are initialized in the next # sub. my ( $rbrace_follower, $index_start_one_line_block, $starting_in_quote, $ending_in_quote, ); # Called before the start of each new batch sub initialize_batch_variables { # Initialize array values for a new batch. Any changes here must be # carefully coordinated with sub store_token_to_go. $max_index_to_go = UNDEFINED_INDEX; $summed_lengths_to_go[0] = 0; $nesting_depth_to_go[0] = 0; $ri_starting_one_line_block = []; # Redefine some sparse arrays. # It is more efficient to redefine these sparse arrays and rely on # undef's instead of initializing to 0's. Testing showed that using # @array=() is more efficient than $#array=-1 @old_breakpoint_to_go = (); @forced_breakpoint_to_go = (); @block_type_to_go = (); @mate_index_to_go = (); @type_sequence_to_go = (); # NOTE: @nobreak_to_go is sparse and could be treated this way, but # testing showed that there would be very little efficiency gain # because an 'if' test must be added in store_token_to_go. # The initialization code for the remaining batch arrays is as follows # and can be activated for testing. But profiling shows that it is # time-consuming to re-initialize the batch arrays and is not necessary # because the maximum valid token, $max_index_to_go, is carefully # controlled. This means however that it is not possible to do any # type of filter or map operation directly on these arrays. And it is # not possible to use negative indexes. As a precaution against program # changes which might do this, sub pad_array_to_go adds some undefs at # the end of the current batch of data. ## 0 && do { #<<< ## @nobreak_to_go = (); ## @token_lengths_to_go = (); ## @levels_to_go = (); ## @ci_levels_to_go = (); ## @tokens_to_go = (); ## @K_to_go = (); ## @types_to_go = (); ## @leading_spaces_to_go = (); ## @reduced_spaces_to_go = (); ## @inext_to_go = (); ## @parent_seqno_to_go = (); ## }; $rbrace_follower = undef; $ending_in_quote = 0; $index_start_one_line_block = undef; # initialize forced breakpoint vars associated with each output batch $forced_breakpoint_count = 0; $index_max_forced_break = UNDEFINED_INDEX; $forced_breakpoint_undo_count = 0; return; } ## end sub initialize_batch_variables sub leading_spaces_to_go { # return the number of indentation spaces for a token in the output # stream my ($ii) = @_; return 0 if ( $ii < 0 ); my $indentation = $leading_spaces_to_go[$ii]; return ref($indentation) ? $indentation->get_spaces() : $indentation; } ## end sub leading_spaces_to_go sub create_one_line_block { # set index starting next one-line block # call with no args to delete the current one-line block ($index_start_one_line_block) = @_; return; } ## end sub create_one_line_block # Routine to place the current token into the output stream. # Called once per output token. use constant DEBUG_STORE => 0; sub store_token_to_go { my ( $self, $Ktoken_vars, $rtoken_vars ) = @_; #------------------------------------------------------- # Token storage utility for sub process_line_of_CODE. # Add one token to the next batch of '_to_go' variables. #------------------------------------------------------- # Input parameters: # $Ktoken_vars = the index K in the global token array # $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values # unless they are temporarily being overridden #------------------------------------------------------------------ # NOTE: called once per token so coding efficiency is critical here. # All changes need to be benchmarked with Devel::NYTProf. #------------------------------------------------------------------ my ( $type, $token, $ci_level, $level, $seqno, $length, ) = @{$rtoken_vars}[ _TYPE_, _TOKEN_, _CI_LEVEL_, _LEVEL_, _TYPE_SEQUENCE_, _TOKEN_LENGTH_, ]; # Check for emergency flush... # The K indexes in the batch must always be a continuous sequence of # the global token array. The batch process programming assumes this. # If storing this token would cause this relation to fail we must dump # the current batch before storing the new token. It is extremely rare # for this to happen. One known example is the following two-line # snippet when run with parameters # --noadd-newlines --space-terminal-semicolon: # if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ; # $yy=1; if ( $max_index_to_go >= 0 ) { if ( $Ktoken_vars != $K_to_go[$max_index_to_go] + 1 ) { $self->flush_batch_of_CODE(); } # Do not output consecutive blank tokens ... this should not # happen, but it is worth checking. Later code can then make the # simplifying assumption that blank tokens are not consecutive. elsif ( $type eq 'b' && $types_to_go[$max_index_to_go] eq 'b' ) { if (DEVEL_MODE) { # if this happens, it is may be that consecutive blanks # were inserted into the token stream in 'respace_tokens' my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1; Fault("consecutive blanks near line $lno; please fix"); } return; } else { ## all ok } } # Do not start a batch with a blank token. # Fixes cases b149 b888 b984 b985 b986 b987 else { if ( $type eq 'b' ) { return } } # Update counter and do initializations if first token of new batch if ( !++$max_index_to_go ) { # Reset flag '$starting_in_quote' for a new batch. It must be set # to the value of '$in_continued_quote', but here for efficiency we # set it to zero, which is its normal value. Then in coding below # we will change it if we find we are actually in a continued quote. $starting_in_quote = 0; # Update the next parent sequence number for each new batch. #---------------------------------------- # Begin coding from sub parent_seqno_by_K #---------------------------------------- # The following is equivalent to this call but much faster: # $next_parent_seqno = $self->parent_seqno_by_K($Ktoken_vars); $next_parent_seqno = SEQ_ROOT; if ($seqno) { $next_parent_seqno = $rparent_of_seqno->{$seqno}; } else { my $Kt = $rLL->[$Ktoken_vars]->[_KNEXT_SEQ_ITEM_]; if ( defined($Kt) ) { my $type_sequence_t = $rLL->[$Kt]->[_TYPE_SEQUENCE_]; my $type_t = $rLL->[$Kt]->[_TYPE_]; # if next container token is closing, it is the parent seqno if ( $is_closing_type{$type_t} ) { $next_parent_seqno = $type_sequence_t; } # otherwise we want its parent container else { $next_parent_seqno = $rparent_of_seqno->{$type_sequence_t}; } } } $next_parent_seqno = SEQ_ROOT if ( !defined($next_parent_seqno) ); #-------------------------------------- # End coding from sub parent_seqno_by_K #-------------------------------------- $next_slevel = $rdepth_of_opening_seqno->[$next_parent_seqno] + 1; } # Clip levels to zero if there are level errors in the file. # We had to wait until now for reasons explained in sub 'write_line'. if ( $level < 0 ) { $level = 0 } # Safety check that length is defined. This is slow and should not be # needed now, so just do it in DEVEL_MODE to check programming changes. # Formerly needed for --indent-only, in which the entire set of tokens # is normally turned into type 'q'. Lengths are now defined in sub # 'respace_tokens' so this check is no longer needed. if ( DEVEL_MODE && !defined($length) ) { my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1; $length = length($token); Fault(<<EOM); undefined length near line $lno; num chars=$length, token='$token' EOM } #---------------------------- # add this token to the batch #---------------------------- $K_to_go[$max_index_to_go] = $Ktoken_vars; $types_to_go[$max_index_to_go] = $type; $tokens_to_go[$max_index_to_go] = $token; $ci_levels_to_go[$max_index_to_go] = $ci_level; $levels_to_go[$max_index_to_go] = $level; $nobreak_to_go[$max_index_to_go] = $no_internal_newlines; $token_lengths_to_go[$max_index_to_go] = $length; # Skip point initialization for these sparse arrays - undef's okay; # See also related code in sub initialize_batch_variables. ## $old_breakpoint_to_go[$max_index_to_go] = 0; ## $forced_breakpoint_to_go[$max_index_to_go] = 0; ## $block_type_to_go[$max_index_to_go] = EMPTY_STRING; ## $type_sequence_to_go[$max_index_to_go] = $seqno; # NOTE: nobreak_to_go can be treated as a sparse array, but testing # showed that there is almost no efficiency gain because an if test # would need to be added. # We keep a running sum of token lengths from the start of this batch: # summed_lengths_to_go[$i] = total length to just before token $i # summed_lengths_to_go[$i+1] = total length to just after token $i $summed_lengths_to_go[ $max_index_to_go + 1 ] = $summed_lengths_to_go[$max_index_to_go] + $length; # Initialize some sequence-dependent variables to their normal values $parent_seqno_to_go[$max_index_to_go] = $next_parent_seqno; $nesting_depth_to_go[$max_index_to_go] = $next_slevel; # Then fix them at container tokens: if ($seqno) { $type_sequence_to_go[$max_index_to_go] = $seqno; $block_type_to_go[$max_index_to_go] = $rblock_type_of_seqno->{$seqno}; if ( $is_opening_token{$token} ) { my $slevel = $rdepth_of_opening_seqno->[$seqno]; $nesting_depth_to_go[$max_index_to_go] = $slevel; $next_slevel = $slevel + 1; $next_parent_seqno = $seqno; } elsif ( $is_closing_token{$token} ) { $next_slevel = $rdepth_of_opening_seqno->[$seqno]; my $slevel = $next_slevel + 1; $nesting_depth_to_go[$max_index_to_go] = $slevel; my $parent_seqno = $rparent_of_seqno->{$seqno}; $parent_seqno = SEQ_ROOT unless defined($parent_seqno); $parent_seqno_to_go[$max_index_to_go] = $parent_seqno; $next_parent_seqno = $parent_seqno; } else { # ternary token: nothing to do } } # Define the indentation that this token will have in two cases: # Without CI = reduced_spaces_to_go # With CI = leading_spaces_to_go $leading_spaces_to_go[$max_index_to_go] = $reduced_spaces_to_go[$max_index_to_go] = $rOpts_indent_columns * $radjusted_levels->[$Ktoken_vars]; if ($ci_level) { $leading_spaces_to_go[$max_index_to_go] += $rOpts_continuation_indentation; } # Correct these values if we are starting in a continued quote if ( $current_line_starts_in_quote && $Ktoken_vars == $K_first ) { # in a continued quote - correct value set above if first token if ( $max_index_to_go == 0 ) { $starting_in_quote = 1 } $leading_spaces_to_go[$max_index_to_go] = 0; $reduced_spaces_to_go[$max_index_to_go] = 0; } DEBUG_STORE && do { my ( $a, $b, $c ) = caller(); print {*STDOUT} "STORE: from $a $c: storing token $token type $type lev=$level at $max_index_to_go\n"; }; return; } ## end sub store_token_to_go sub flush_batch_of_CODE { # Finish and process the current batch. # This must be the only call to grind_batch_of_CODE() my ($self) = @_; # If a batch has been started ... if ( $max_index_to_go >= 0 ) { # Create an array to hold variables for this batch my $this_batch = []; $this_batch->[_starting_in_quote_] = 1 if ($starting_in_quote); $this_batch->[_ending_in_quote_] = 1 if ($ending_in_quote); if ( $CODE_type || $last_CODE_type ) { $this_batch->[_batch_CODE_type_] = $K_to_go[$max_index_to_go] >= $K_first ? $CODE_type : $last_CODE_type; } $last_line_had_side_comment = ( $max_index_to_go > 0 && $types_to_go[$max_index_to_go] eq '#' ); # The flag $is_static_block_comment applies to the line which just # arrived. So it only applies if we are outputting that line. if ( $is_static_block_comment && !$last_line_had_side_comment ) { $this_batch->[_is_static_block_comment_] = $K_to_go[0] == $K_first; } $this_batch->[_ri_starting_one_line_block_] = $ri_starting_one_line_block; $self->[_this_batch_] = $this_batch; #------------------- # process this batch #------------------- $self->grind_batch_of_CODE(); # Done .. this batch is history $self->[_this_batch_] = undef; initialize_batch_variables(); } return; } ## end sub flush_batch_of_CODE sub end_batch { # End the current batch, EXCEPT for a few special cases my ($self) = @_; if ( $max_index_to_go < 0 ) { # nothing to do .. this is harmless but wastes time. if (DEVEL_MODE) { Fault("sub end_batch called with nothing to do; please fix\n"); } return; } # Exceptions when a line does not end with a comment... (fixes c058) if ( $types_to_go[$max_index_to_go] ne '#' ) { # Exception 1: Do not end line in a weld return if ( $total_weld_count && $self->[_rK_weld_right_]->{ $K_to_go[$max_index_to_go] } ); # Exception 2: just set a tentative breakpoint if we might be in a # one-line block if ( defined($index_start_one_line_block) ) { $self->set_forced_breakpoint($max_index_to_go); return; } } $self->flush_batch_of_CODE(); return; } ## end sub end_batch sub flush_vertical_aligner { my ($self) = @_; my $vao = $self->[_vertical_aligner_object_]; $vao->flush(); return; } ## end sub flush_vertical_aligner # flush is called to output any tokens in the pipeline, so that # an alternate source of lines can be written in the correct order sub flush { my ( $self, $CODE_type_flush ) = @_; # end the current batch with 1 exception $index_start_one_line_block = undef; # Exception: if we are flushing within the code stream only to insert # blank line(s), then we can keep the batch intact at a weld. This # improves formatting of -ce. See test 'ce1.ce' if ( $CODE_type_flush && $CODE_type_flush eq 'BL' ) { $self->end_batch() if ( $max_index_to_go >= 0 ); } # otherwise, we have to shut things down completely. else { $self->flush_batch_of_CODE() } $self->flush_vertical_aligner(); return; } ## end sub flush my %is_assignment_or_fat_comma; BEGIN { %is_assignment_or_fat_comma = %is_assignment; $is_assignment_or_fat_comma{'=>'} = 1; } sub add_missing_else { # Add a missing 'else' block. # $K_dangling_elsif = index of closing elsif brace not followed by else my ($self) = @_; # Make sure everything looks okay if ( !$K_dangling_elsif || $K_dangling_elsif < $K_first || $rLL->[$K_dangling_elsif]->[_TYPE_] ne '}' ) { DEVEL_MODE && Fault("could not find closing elsif brace\n"); } my $comment = $rOpts->{'add-missing-else-comment'}; # Safety check if ( substr( $comment, 0, 1 ) ne '#' ) { $comment = '#' . $comment } # Calculate indentation my $level = $radjusted_levels->[$K_dangling_elsif]; my $spaces = SPACE x ( $level * $rOpts_indent_columns ); my $line1 = $spaces . "else {\n"; my $line3 = $spaces . "}\n"; $spaces .= SPACE x $rOpts_indent_columns; my $line2 = $spaces . $comment . "\n"; # clear the output pipeline $self->flush(); my $file_writer_object = $self->[_file_writer_object_]; $file_writer_object->write_code_line($line1); $file_writer_object->write_code_line($line2); $file_writer_object->write_code_line($line3); return; } sub process_line_of_CODE { my ( $self, $my_line_of_tokens ) = @_; #---------------------------------------------------------------- # This routine is called once per INPUT line to format all of the # tokens on that line. #---------------------------------------------------------------- # It outputs full-line comments and blank lines immediately. # For lines of code: # - Tokens are copied one-by-one from the global token # array $rLL to a set of '_to_go' arrays which collect batches of # tokens. This is done with calls to 'store_token_to_go'. # - A batch is closed and processed upon reaching a well defined # structural break point (i.e. code block boundary) or forced # breakpoint (i.e. side comment or special user controls). # - Subsequent stages of formatting make additional line breaks # appropriate for lists and logical structures, and as necessary to # keep line lengths below the requested maximum line length. #----------------------------------- # begin initialize closure variables #----------------------------------- $line_of_tokens = $my_line_of_tokens; my $rK_range = $line_of_tokens->{_rK_range}; if ( !defined( $rK_range->[0] ) ) { # Empty line: This can happen if tokens are deleted, for example # with the -mangle parameter return; } ( $K_first, $K_last ) = @{$rK_range}; $last_CODE_type = $CODE_type; $CODE_type = $line_of_tokens->{_code_type}; $current_line_starts_in_quote = $line_of_tokens->{_starting_in_quote}; $rLL = $self->[_rLL_]; $radjusted_levels = $self->[_radjusted_levels_]; $rparent_of_seqno = $self->[_rparent_of_seqno_]; $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_]; $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; #--------------------------------- # end initialize closure variables #--------------------------------- # This flag will become nobreak_to_go and should be set to 2 to prevent # a line break AFTER the current token. $no_internal_newlines = 0; if ( !$rOpts_add_newlines || $CODE_type eq 'NIN' ) { $no_internal_newlines = 2; } my $input_line = $line_of_tokens->{_line_text}; my ( $is_block_comment, $has_side_comment ); if ( $rLL->[$K_last]->[_TYPE_] eq '#' ) { if ( $K_last == $K_first ) { $is_block_comment = 1 } else { $has_side_comment = 1 } } my $is_static_block_comment_without_leading_space = $CODE_type eq 'SBCX'; $is_static_block_comment = $CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space; # check for a $VERSION statement if ( $CODE_type eq 'VER' ) { $self->[_saw_VERSION_in_this_file_] = 1; $no_internal_newlines = 2; } # Add interline blank if any my $last_old_nonblank_type = "b"; my $first_new_nonblank_token = EMPTY_STRING; my $K_first_true = $K_first; if ( $max_index_to_go >= 0 ) { $last_old_nonblank_type = $types_to_go[$max_index_to_go]; $first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_]; if ( !$is_block_comment && $types_to_go[$max_index_to_go] ne 'b' && $K_first > 0 && $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' ) { $K_first -= 1; } } my $rtok_first = $rLL->[$K_first]; my $in_quote = $line_of_tokens->{_ending_in_quote}; $ending_in_quote = $in_quote; #------------------------------------ # Handle a block (full-line) comment. #------------------------------------ if ($is_block_comment) { if ( $rOpts->{'delete-block-comments'} ) { $self->flush(); return; } $index_start_one_line_block = undef; $self->end_batch() if ( $max_index_to_go >= 0 ); # output a blank line before block comments if ( # unless we follow a blank or comment line $self->[_last_line_leading_type_] ne '#' && $self->[_last_line_leading_type_] ne 'b' # only if allowed && $rOpts->{'blanks-before-comments'} # if this is NOT an empty comment, unless it follows a side # comment and could become a hanging side comment. && ( $rtok_first->[_TOKEN_] ne '#' || ( $last_line_had_side_comment && $rLL->[$K_first]->[_LEVEL_] > 0 ) ) # not after a short line ending in an opening token # because we already have space above this comment. # Note that the first comment in this if block, after # the 'if (', does not get a blank line because of this. && !$self->[_last_output_short_opening_token_] # never before static block comments && !$is_static_block_comment ) { $self->flush(); # switching to new output stream my $file_writer_object = $self->[_file_writer_object_]; $file_writer_object->write_blank_code_line(); $self->[_last_line_leading_type_] = 'b'; } if ( $rOpts->{'indent-block-comments'} && ( !$rOpts->{'indent-spaced-block-comments'} || $input_line =~ /^\s+/ ) && !$is_static_block_comment_without_leading_space ) { my $Ktoken_vars = $K_first; my $rtoken_vars = $rLL->[$Ktoken_vars]; $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); $self->end_batch(); } else { # switching to new output stream $self->flush(); # Note that last arg in call here is 'undef' for comments my $file_writer_object = $self->[_file_writer_object_]; $file_writer_object->write_code_line( $rtok_first->[_TOKEN_] . "\n", undef ); $self->[_last_line_leading_type_] = '#'; } return; } #-------------------------------------------- # Compare input/output indentation in logfile #-------------------------------------------- if ( $self->[_save_logfile_] ) { my $guessed_indentation_level = $line_of_tokens->{_guessed_indentation_level}; # Compare input/output indentation except for: # - hanging side comments # - continuation lines (have unknown leading blank space) # - and lines which are quotes (they may have been outdented) my $exception = $CODE_type eq 'HSC' || $rtok_first->[_CI_LEVEL_] > 0 || $guessed_indentation_level == 0 && $rtok_first->[_TYPE_] eq 'Q'; if ( !$exception ) { my $input_line_number = $line_of_tokens->{_line_number}; $self->compare_indentation_levels( $K_first, $guessed_indentation_level, $input_line_number ); } } #----------------------------------------- # Handle a line marked as indentation-only #----------------------------------------- if ( $CODE_type eq 'IO' ) { $self->flush(); my $line = $input_line; # Fix for rt #125506 Unexpected string formatting # in which leading space of a terminal quote was removed $line =~ s/\s+$//; $line =~ s/^\s+// unless ( $line_of_tokens->{_starting_in_quote} ); my $Ktoken_vars = $K_first; # We work with a copy of the token variables and change the # first token to be the entire line as a quote variable my $rtoken_vars = $rLL->[$Ktoken_vars]; $rtoken_vars = copy_token_as_type( $rtoken_vars, 'q', $line ); # Patch: length is not really important here but must be defined $rtoken_vars->[_TOKEN_LENGTH_] = length($line); $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); $self->end_batch(); return; } #--------------------------- # Handle all other lines ... #--------------------------- $K_dangling_elsif = 0; # This is a good place to kill incomplete one-line blocks if ( $max_index_to_go >= 0 ) { # For -iob and -lp, mark essential old breakpoints. # Fixes b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058 # See related code below. if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) { my $type_first = $rLL->[$K_first_true]->[_TYPE_]; if ( $is_assignment_or_fat_comma{$type_first} ) { $old_breakpoint_to_go[$max_index_to_go] = 1; } } if ( # this check needed -mangle (for example rt125012) ( ( !$index_start_one_line_block ) && ( $last_old_nonblank_type eq ';' ) && ( $first_new_nonblank_token ne '}' ) ) # Patch for RT #98902. Honor request to break at old commas. || ( $rOpts_break_at_old_comma_breakpoints && $last_old_nonblank_type eq ',' ) ) { $forced_breakpoint_to_go[$max_index_to_go] = 1 if ($rOpts_break_at_old_comma_breakpoints); $index_start_one_line_block = undef; $self->end_batch(); } # Keep any requested breaks before this line. Note that we have to # use the original K_first because it may have been reduced above # to add a blank. The value of the flag is as follows: # 1 => hard break, flush the batch # 2 => soft break, set breakpoint and continue building the batch # added check on max_index_to_go for c177 if ( $max_index_to_go >= 0 && $self->[_rbreak_before_Kfirst_]->{$K_first_true} ) { $index_start_one_line_block = undef; if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) { $self->set_forced_breakpoint($max_index_to_go); } else { $self->end_batch(); } } } #-------------------------------------- # loop to process the tokens one-by-one #-------------------------------------- $self->process_line_inner_loop($has_side_comment); # if there is anything left in the output buffer ... if ( $max_index_to_go >= 0 ) { my $type = $rLL->[$K_last]->[_TYPE_]; my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last}; # we have to flush .. if ( # if there is a side comment... $type eq '#' # if this line ends in a quote # NOTE: This is critically important for insuring that quoted # lines do not get processed by things like -sot and -sct || $in_quote # if this is a VERSION statement || $CODE_type eq 'VER' # to keep a label at the end of a line || ( $type eq 'J' && $rOpts_break_after_labels != 2 ) # if we have a hard break request || $break_flag && $break_flag != 2 # if we are instructed to keep all old line breaks || !$rOpts->{'delete-old-newlines'} # if this is a line of the form 'use overload'. A break here in # the input file is a good break because it will allow the # operators which follow to be formatted well. Without this # break the formatting with -ci=4 -xci is poor, for example. # use overload # '+' => sub { # print length $_[2], "\n"; # my ( $x, $y ) = _order(@_); # Number::Roman->new( int $x + $y ); # }, # '-' => sub { # my ( $x, $y ) = _order(@_); # Number::Roman->new( int $x - $y ); # }; || ( $max_index_to_go == 2 && $types_to_go[0] eq 'k' && $tokens_to_go[0] eq 'use' && $tokens_to_go[$max_index_to_go] eq 'overload' ) ) { $index_start_one_line_block = undef; $self->end_batch(); } else { # Check for a soft break request if ( $break_flag && $break_flag == 2 ) { $self->set_forced_breakpoint($max_index_to_go); } # mark old line breakpoints in current output stream if ( !$rOpts_ignore_old_breakpoints # Mark essential old breakpoints if combination -iob -lp is # used. These two options do not work well together, but # we can avoid turning -iob off by ignoring -iob at certain # essential line breaks. See also related code above. # Fixes b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058 || ( $rOpts_line_up_parentheses && $is_assignment_or_fat_comma{$type} ) ) { $old_breakpoint_to_go[$max_index_to_go] = 1; } } } if ( $K_dangling_elsif && $rOpts_add_missing_else ) { $self->add_missing_else(); } return; } ## end sub process_line_of_CODE sub process_line_inner_loop { my ( $self, $has_side_comment ) = @_; #-------------------------------------------------------------------- # Loop to move all tokens from one input line to a newly forming batch #-------------------------------------------------------------------- # Do not start a new batch with a blank space if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) { $K_first++; } foreach my $Ktoken_vars ( $K_first .. $K_last ) { my $rtoken_vars = $rLL->[$Ktoken_vars]; #-------------- # handle blanks #-------------- if ( $rtoken_vars->[_TYPE_] eq 'b' ) { $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); next; } #------------------ # handle non-blanks #------------------ my $type = $rtoken_vars->[_TYPE_]; # If we are continuing after seeing a right curly brace, flush # buffer unless we see what we are looking for, as in # } else ... if ($rbrace_follower) { my $token = $rtoken_vars->[_TOKEN_]; if ( !$rbrace_follower->{$token} ) { $self->end_batch() if ( $max_index_to_go >= 0 ); } $rbrace_follower = undef; } my ( $block_type, $type_sequence, $is_opening_BLOCK, $is_closing_BLOCK, $nobreak_BEFORE_BLOCK ); if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) { my $token = $rtoken_vars->[_TOKEN_]; $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_]; $block_type = $rblock_type_of_seqno->{$type_sequence}; if ( $block_type && $token eq $type && $block_type ne 't' && !$self->[_rshort_nested_]->{$type_sequence} ) { if ( $type eq '{' ) { $is_opening_BLOCK = 1; $nobreak_BEFORE_BLOCK = $no_internal_newlines; } elsif ( $type eq '}' ) { $is_closing_BLOCK = 1; $nobreak_BEFORE_BLOCK = $no_internal_newlines; } else { ## error - block should be enclosed by curly brace DEVEL_MODE && Fault(<<EOM); block type '$block_type' has unexpected container type '$type' EOM } } } #--------------------- # handle side comments #--------------------- if ($has_side_comment) { # if at last token ... if ( $Ktoken_vars == $K_last ) { $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); next; } # if before last token ... do not allow breaks which would # promote a side comment to a block comment if ( $Ktoken_vars == $K_last - 1 || $Ktoken_vars == $K_last - 2 && $rLL->[ $K_last - 1 ]->[_TYPE_] eq 'b' ) { $no_internal_newlines = 2; } } # Process non-blank and non-comment tokens ... #----------------- # handle semicolon #----------------- if ( $type eq ';' ) { my $next_nonblank_token_type = 'b'; my $next_nonblank_token = EMPTY_STRING; if ( $Ktoken_vars < $K_last ) { my $Knnb = $Ktoken_vars + 1; $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' ); $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_]; $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_]; } if ( $rOpts_break_at_old_semicolon_breakpoints && ( $Ktoken_vars == $K_first ) && $max_index_to_go >= 0 && !defined($index_start_one_line_block) ) { $self->end_batch(); } $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); $self->end_batch() if ( !$no_internal_newlines && ( !$rOpts_keep_interior_semicolons || $Ktoken_vars >= $K_last ) && ( $next_nonblank_token ne '}' ) ); } #----------- # handle '{' #----------- elsif ($is_opening_BLOCK) { # Tentatively output this token. This is required before # calling starting_one_line_block. We may have to unstore # it, though, if we have to break before it. $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); # Look ahead to see if we might form a one-line block.. my $too_long = $self->starting_one_line_block( $Ktoken_vars, $K_last_nonblank_code, $K_last ); $self->clear_breakpoint_undo_stack(); # to simplify the logic below, set a flag to indicate if # this opening brace is far from the keyword which introduces it my $keyword_on_same_line = 1; if ( $max_index_to_go >= 0 && defined($K_last_nonblank_code) && $rLL->[$K_last_nonblank_code]->[_TYPE_] eq ')' && ( ( $rtoken_vars->[_LEVEL_] < $levels_to_go[0] ) || $too_long ) ) { $keyword_on_same_line = 0; } # Break before '{' if requested with -bl or -bli flag my $want_break = $self->[_rbrace_left_]->{$type_sequence}; # But do not break if this token is welded to the left if ( $total_weld_count && defined( $self->[_rK_weld_left_]->{$Ktoken_vars} ) ) { $want_break = 0; } # Break BEFORE an opening '{' ... if ( # if requested $want_break # and we were unable to start looking for a block, && !defined($index_start_one_line_block) # or if it will not be on same line as its keyword, so that # it will be outdented (eval.t, overload.t), and the user # has not insisted on keeping it on the right || ( !$keyword_on_same_line && !$rOpts_opening_brace_always_on_right ) ) { # but only if allowed if ( !$nobreak_BEFORE_BLOCK ) { # since we already stored this token, we must unstore it $self->unstore_token_to_go(); # then output the line $self->end_batch() if ( $max_index_to_go >= 0 ); # and now store this token at the start of a new line $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); } } # now output this line $self->end_batch() if ( $max_index_to_go >= 0 && !$no_internal_newlines ); } #----------- # handle '}' #----------- elsif ($is_closing_BLOCK) { my $next_nonblank_token_type = 'b'; my $next_nonblank_token = EMPTY_STRING; my $Knnb; if ( $Ktoken_vars < $K_last ) { $Knnb = $Ktoken_vars + 1; $Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' ); $next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_]; $next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_]; } # If there is a pending one-line block .. if ( defined($index_start_one_line_block) ) { # Fix for b1208: if a side comment follows this closing # brace then we must include its length in the length test # ... unless the -issl flag is set (fixes b1307-1309). # Assume a minimum of 1 blank space to the comment. my $added_length = 0; if ( $has_side_comment && !$rOpts_ignore_side_comment_lengths && $next_nonblank_token_type eq '#' ) { $added_length = 1 + $rLL->[$K_last]->[_TOKEN_LENGTH_]; } # we have to terminate it if.. if ( # it is too long (final length may be different from # initial estimate). note: must allow 1 space for this # token $self->excess_line_length( $index_start_one_line_block, $max_index_to_go ) + $added_length >= 0 ) { $index_start_one_line_block = undef; } } # put a break before this closing curly brace if appropriate $self->end_batch() if ( $max_index_to_go >= 0 && !$nobreak_BEFORE_BLOCK && !defined($index_start_one_line_block) ); # store the closing curly brace $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); # ok, we just stored a closing curly brace. Often, but # not always, we want to end the line immediately. # So now we have to check for special cases. # if this '}' successfully ends a one-line block.. my $one_line_block_type = EMPTY_STRING; my $keep_going; if ( defined($index_start_one_line_block) ) { # Remember the type of token just before the # opening brace. It would be more general to use # a stack, but this will work for one-line blocks. $one_line_block_type = $types_to_go[$index_start_one_line_block]; # we have to actually make it by removing tentative # breaks that were set within it $self->undo_forced_breakpoint_stack(0); # For -lp, extend the nobreak to include a trailing # terminal ','. This is because the -lp indentation was # not known when making one-line blocks, so we may be able # to move the line back to fit. Otherwise we may create a # needlessly stranded comma on the next line. my $iend_nobreak = $max_index_to_go - 1; if ( $rOpts_line_up_parentheses && $next_nonblank_token_type eq ',' && $Knnb eq $K_last ) { my $p_seqno = $parent_seqno_to_go[$max_index_to_go]; my $is_excluded = $self->[_ris_excluded_lp_container_]->{$p_seqno}; $iend_nobreak = $max_index_to_go if ( !$is_excluded ); } $self->set_nobreaks( $index_start_one_line_block, $iend_nobreak ); # save starting block indexes so that sub correct_lp can # check and adjust -lp indentation (c098) push @{$ri_starting_one_line_block}, $index_start_one_line_block; # then re-initialize for the next one-line block $index_start_one_line_block = undef; # then decide if we want to break after the '}' .. # We will keep going to allow certain brace followers as in: # do { $ifclosed = 1; last } unless $losing; # # But make a line break if the curly ends a # significant block: if ( ( $is_block_without_semicolon{$block_type} # Follow users break point for # one line block types U & G, such as a 'try' block || $one_line_block_type =~ /^[UG]$/ && $Ktoken_vars == $K_last ) # if needless semicolon follows we handle it later && $next_nonblank_token ne ';' ) { $self->end_batch() unless ($no_internal_newlines); } } # set string indicating what we need to look for brace follower # tokens if ( $is_if_unless_elsif_else{$block_type} ) { $rbrace_follower = undef; } elsif ( $block_type eq 'do' ) { $rbrace_follower = \%is_do_follower; if ( $self->tight_paren_follows( $K_to_go[0], $Ktoken_vars ) ) { $rbrace_follower = { ')' => 1 }; } } # added eval for borris.t elsif ($is_sort_map_grep_eval{$block_type} || $one_line_block_type eq 'G' ) { $rbrace_follower = undef; $keep_going = 1; } # anonymous sub elsif ( $self->[_ris_asub_block_]->{$type_sequence} ) { if ($one_line_block_type) { $rbrace_follower = \%is_anon_sub_1_brace_follower; # Exceptions to help keep -lp intact, see git #74 ... # Exception 1: followed by '}' on this line if ( $Ktoken_vars < $K_last && $next_nonblank_token eq '}' ) { $rbrace_follower = undef; $keep_going = 1; } # Exception 2: followed by '}' on next line if -lp set. # The -lp requirement allows the formatting to follow # old breaks when -lp is not used, minimizing changes. # Fixes issue c087. elsif ($Ktoken_vars == $K_last && $rOpts_line_up_parentheses ) { my $K_closing_container = $self->[_K_closing_container_]; my $p_seqno = $parent_seqno_to_go[$max_index_to_go]; my $Kc = $K_closing_container->{$p_seqno}; my $is_excluded = $self->[_ris_excluded_lp_container_]->{$p_seqno}; $keep_going = ( defined($Kc) && $rLL->[$Kc]->[_TOKEN_] eq '}' && !$is_excluded && $Kc - $Ktoken_vars <= 2 ); $rbrace_follower = undef if ($keep_going); } else { ## not an exception } } else { $rbrace_follower = \%is_anon_sub_brace_follower; } } # None of the above: specify what can follow a closing # brace of a block which is not an # if/elsif/else/do/sort/map/grep/eval # Testfiles: # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t else { $rbrace_follower = \%is_other_brace_follower; } # See if an elsif block is followed by another elsif or else; # complain if not. if ( $block_type eq 'elsif' ) { # more code on this line ? ( this is unusual ) if ( $next_nonblank_token_type ne 'b' && $next_nonblank_token_type ne '#' ) { # check for 'elsif' or 'else' if ( !$is_elsif_else{$next_nonblank_token} ) { write_logfile_entry("(No else block)\n"); # Note that we cannot add a missing else block # in this case because more code follows the # closing elsif brace on the same line. if ( $rOpts_warn_missing_else && !DEVEL_MODE ) { my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1; warning("$lno: No else block\n"); } } } # no more code on this line, so check on next line else { my $K_next = $self->K_next_code($K_last); if ( !defined($K_next) || $rLL->[$K_next]->[_TYPE_] ne 'k' || !$is_elsif_else{ $rLL->[$K_next]->[_TOKEN_] } ) { $K_dangling_elsif = $Ktoken_vars; write_logfile_entry("(No else block)\n"); if ( $rOpts_warn_missing_else && !DEVEL_MODE ) { my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1; if ($rOpts_add_missing_else) { warning( "$lno: Adding missing else block\n"); } else { warning( "$lno: No else block (use -ame to add one)\n" ); } } } } } # keep going after certain block types (map,sort,grep,eval) # added eval for borris.t if ($keep_going) { # keep going $rbrace_follower = undef; } # if no more tokens, postpone decision until re-entering elsif ( ( $next_nonblank_token_type eq 'b' ) && $rOpts_add_newlines ) { if ( !$rbrace_follower ) { $self->end_batch() if (!$no_internal_newlines && $max_index_to_go >= 0 ); } } elsif ($rbrace_follower) { if ( $rbrace_follower->{$next_nonblank_token} ) { # Fix for b1385: keep break after a comma following a # 'do' block. This could also be used for other block # types, but that would cause a significant change in # existing formatting without much benefit. if ( $next_nonblank_token eq ',' && $Knnb eq $K_last && $block_type eq 'do' && $rOpts_add_newlines && $self->is_trailing_comma($Knnb) ) { $self->[_rbreak_after_Klast_]->{$K_last} = 1; } } else { $self->end_batch() if (!$no_internal_newlines && $max_index_to_go >= 0 ); } $rbrace_follower = undef; } else { $self->end_batch() if ( !$no_internal_newlines && $max_index_to_go >= 0 ); } } ## end treatment of closing block token #------------------------------ # handle here_doc target string #------------------------------ elsif ( $type eq 'h' ) { # no newlines after seeing here-target $no_internal_newlines = 2; $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); } #----------------------------- # handle all other token types #----------------------------- else { $self->store_token_to_go( $Ktoken_vars, $rtoken_vars ); # break after a label if requested if ( $rOpts_break_after_labels && $type eq 'J' && $rOpts_break_after_labels == 1 ) { $self->end_batch() unless ($no_internal_newlines); } } # remember previous nonblank, non-comment OUTPUT token $K_last_nonblank_code = $Ktoken_vars; } ## end of loop over all tokens in this line return; } ## end sub process_line_inner_loop } ## end closure process_line_of_CODE sub is_trailing_comma { my ( $self, $KK ) = @_; # Given: # $KK - index of a comma in token list # Return: # true if the comma at index $KK is a trailing comma # false if not my $rLL = $self->[_rLL_]; my $type_KK = $rLL->[$KK]->[_TYPE_]; if ( $type_KK ne ',' ) { DEVEL_MODE && Fault("Bad call: expected type ',' but received '$type_KK'\n"); return; } my $Knnb = $self->K_next_nonblank($KK); if ( defined($Knnb) ) { my $type_sequence = $rLL->[$Knnb]->[_TYPE_SEQUENCE_]; my $type_Knnb = $rLL->[$Knnb]->[_TYPE_]; if ( $type_sequence && $is_closing_type{$type_Knnb} ) { return 1; } } return; } ## end sub is_trailing_comma sub tight_paren_follows { my ( $self, $K_to_go_0, $K_ic ) = @_; # Input parameters: # $K_to_go_0 = first token index K of this output batch (=K_to_go[0]) # $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go]) # Return parameter: # false if we want a break after the closing do brace # true if we do not want a break after the closing do brace # We are at the closing brace of a 'do' block. See if this brace is # followed by a closing paren, and if so, set a flag which indicates # that we do not want a line break between the '}' and ')'. # xxxxx ( ...... do { ... } ) { # ^-------looking at this brace, K_ic # Subscript notation: # _i = inner container (braces in this case) # _o = outer container (parens in this case) # _io = inner opening = '{' # _ic = inner closing = '}' # _oo = outer opening = '(' # _oc = outer closing = ')' # |--K_oo |--K_oc = outer container # xxxxx ( ...... do { ...... } ) { # |--K_io |--K_ic = inner container # In general, the safe thing to do is return a 'false' value # if the statement appears to be complex. This will have # the downstream side-effect of opening up outer containers # to help make complex code readable. But for simpler # do blocks it can be preferable to keep the code compact # by returning a 'true' value. return unless defined($K_ic); my $rLL = $self->[_rLL_]; # we should only be called at a closing block my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_]; return unless ($seqno_i); # shouldn't happen; # This only applies if the next nonblank is a ')' my $K_oc = $self->K_next_nonblank($K_ic); return unless defined($K_oc); my $token_next = $rLL->[$K_oc]->[_TOKEN_]; return unless ( $token_next eq ')' ); my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_]; my $K_io = $self->[_K_opening_container_]->{$seqno_i}; my $K_oo = $self->[_K_opening_container_]->{$seqno_o}; return unless ( defined($K_io) && defined($K_oo) ); # RULE 1: Do not break before a closing signature paren # (regardless of complexity). This is a fix for issue git#22. # Looking for something like: # sub xxx ( ... do { ... } ) { # ^----- next block_type my $K_test = $self->K_next_nonblank($K_oc); if ( defined($K_test) && $rLL->[$K_test]->[_TYPE_] eq '{' ) { my $seqno_test = $rLL->[$K_test]->[_TYPE_SEQUENCE_]; if ($seqno_test) { if ( $self->[_ris_asub_block_]->{$seqno_test} || $self->[_ris_sub_block_]->{$seqno_test} ) { return 1; } } } # RULE 2: Break if the contents within braces appears to be 'complex'. We # base this decision on the number of tokens between braces. # xxxxx ( ... do { ... } ) { # ^^^^^^ # Although very simple, it has the advantages of (1) being insensitive to # changes in lengths of identifier names, (2) easy to understand, implement # and test. A test case for this is 't/snippets/long_line.in'. # Example: $K_ic - $K_oo = 9 [Pass Rule 2] # if ( do { $2 !~ /&/ } ) { ... } # Example: $K_ic - $K_oo = 10 [Pass Rule 2] # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... } # Example: $K_ic - $K_oo = 20 [Fail Rule 2] # test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; }); return if ( $K_ic - $K_io > 16 ); # RULE 3: break if the code between the opening '(' and the '{' is 'complex' # As with the previous rule, we decide based on the token count # xxxxx ( ... do { ... } ) { # ^^^^^^^^ # Example: $K_ic - $K_oo = 9 [Pass Rule 2] # $K_io - $K_oo = 4 [Pass Rule 3] # if ( do { $2 !~ /&/ } ) { ... } # Example: $K_ic - $K_oo = 10 [Pass rule 2] # $K_io - $K_oo = 9 [Pass rule 3] # for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... } return if ( $K_io - $K_oo > 9 ); # RULE 4: Break if we have already broken this batch of output tokens return if ( $K_oo < $K_to_go_0 ); # RULE 5: Break if input is not on one line # For example, we will set the flag for the following expression # written in one line: # This has: $K_ic - $K_oo = 10 [Pass rule 2] # $K_io - $K_oo = 8 [Pass rule 3] # $self->debug( 'Error: ' . do { local $/; <$err> } ); # but we break after the brace if it is on multiple lines on input, since # the user may prefer it on multiple lines: # [Fail rule 5] # $self->debug( # 'Error: ' . do { local $/; <$err> } # ); if ( !$rOpts_ignore_old_breakpoints ) { my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_]; my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_]; return if ( $iline_oo != $iline_oc ); } # OK to keep the paren tight return 1; } ## end sub tight_paren_follows my %is_brace_semicolon_colon; BEGIN { my @q = qw( { } ; : ); @is_brace_semicolon_colon{@q} = (1) x scalar(@q); } sub starting_one_line_block { # After seeing an opening curly brace, look for the closing brace and see # if the entire block will fit on a line. This routine is not always right # so a check is made later (at the closing brace) to make sure we really # have a one-line block. We have to do this preliminary check, though, # because otherwise we would always break at a semicolon within a one-line # block if the block contains multiple statements. # Given: # $Kj = index of opening brace # $K_last_nonblank = index of previous nonblank code token # $K_last = index of last token of input line # Calls 'create_one_line_block' if one-line block might be formed. # Also returns a flag '$too_long': # true = distance from opening keyword to OPENING brace exceeds # the maximum line length. # false (simple return) => not too long # Note that this flag is for distance from the statement start to the # OPENING brace, not the closing brace. my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_; my $rbreak_container = $self->[_rbreak_container_]; my $rshort_nested = $self->[_rshort_nested_]; my $rLL = $self->[_rLL_]; my $K_opening_container = $self->[_K_opening_container_]; my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; # kill any current block - we can only go 1 deep create_one_line_block(); my $i_start = 0; # This routine should not have been called if there are no tokens in the # 'to_go' arrays of previously stored tokens. A previous call to # 'store_token_to_go' should have stored an opening brace. An error here # indicates that a programming change may have caused a flush operation to # clean out the previously stored tokens. if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) { Fault("program bug: store_token_to_go called incorrectly\n") if (DEVEL_MODE); return; } # Return if block should be broken my $type_sequence_j = $rLL->[$Kj]->[_TYPE_SEQUENCE_]; if ( $rbreak_container->{$type_sequence_j} ) { return; } my $ris_bli_container = $self->[_ris_bli_container_]; my $is_bli = $ris_bli_container->{$type_sequence_j}; my $block_type = $rblock_type_of_seqno->{$type_sequence_j}; $block_type = EMPTY_STRING unless ( defined($block_type) ); my $previous_nonblank_token = EMPTY_STRING; my $i_last_nonblank = -1; if ( defined($K_last_nonblank) ) { $i_last_nonblank = $K_last_nonblank - $K_to_go[0]; if ( $i_last_nonblank >= 0 ) { $previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_]; } } #--------------------------------------------------------------------- # find the starting keyword for this block (such as 'if', 'else', ...) #--------------------------------------------------------------------- if ( $max_index_to_go == 0 ##|| $block_type =~ /^[\{\}\;\:]$/ || $is_brace_semicolon_colon{$block_type} || substr( $block_type, 0, 7 ) eq 'package' ) { $i_start = $max_index_to_go; } # the previous nonblank token should start these block types elsif ( $i_last_nonblank >= 0 && ( $previous_nonblank_token eq $block_type || $self->[_ris_asub_block_]->{$type_sequence_j} || $self->[_ris_sub_block_]->{$type_sequence_j} || substr( $block_type, -2, 2 ) eq '()' ) ) { $i_start = $i_last_nonblank; # For signatures and extended syntax ... # If this brace follows a parenthesized list, we should look back to # find the keyword before the opening paren because otherwise we might # form a one line block which stays intact, and cause the parenthesized # expression to break open. That looks bad. if ( $tokens_to_go[$i_start] eq ')' ) { # Find the opening paren my $K_start = $K_to_go[$i_start]; return unless defined($K_start); my $seqno = $type_sequence_to_go[$i_start]; return unless ($seqno); my $K_opening = $K_opening_container->{$seqno}; return if ( !defined($K_opening) ); my $i_opening = $i_start + ( $K_opening - $K_start ); # give up if not on this line return if ( $i_opening < 0 ); $i_start = $i_opening; # go back one token before the opening paren if ( $i_start > 0 ) { $i_start-- } if ( $types_to_go[$i_start] eq 'b' && $i_start > 0 ) { $i_start--; } my $lev = $levels_to_go[$i_start]; if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return } } } elsif ( $previous_nonblank_token eq ')' ) { # For something like "if (xxx) {", the keyword "if" will be # just after the most recent break. This will be 0 unless # we have just killed a one-line block and are starting another. # (doif.t) # Note: cannot use inext_index_to_go[] here because that array # is still being constructed. $i_start = $index_max_forced_break + 1; if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; } # Patch to avoid breaking short blocks defined with extended_syntax: # Strip off any trailing () which was added in the parser to mark # the opening keyword. For example, in the following # create( TypeFoo $e) {$bubba} # the blocktype would be marked as create() my $stripped_block_type = $block_type; if ( substr( $block_type, -2, 2 ) eq '()' ) { $stripped_block_type = substr( $block_type, 0, -2 ); } if ( $tokens_to_go[$i_start] ne $stripped_block_type ) { return; } } # patch for SWITCH/CASE to retain one-line case/when blocks elsif ( $block_type eq 'case' || $block_type eq 'when' ) { # Note: cannot use inext_index_to_go[] here because that array # is still being constructed. $i_start = $index_max_forced_break + 1; if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; } if ( $tokens_to_go[$i_start] ne $block_type ) { return; } } else { #------------------------------------------- # Couldn't find start - return too_long flag #------------------------------------------- return 1; } my $pos = total_line_length( $i_start, $max_index_to_go ) - 1; my $maximum_line_length = $maximum_line_length_at_level[ $levels_to_go[$i_start] ]; # see if distance to the opening container is too great to even start if ( $pos > $maximum_line_length ) { #------------------------------ # too long to the opening token #------------------------------ return 1; } #----------------------------------------------------------------------- # OK so far: the statement is not to long just to the OPENING token. Now # see if everything to the closing token will fit on one line #----------------------------------------------------------------------- # This is part of an update to fix cases b562 .. b983 my $K_closing = $self->[_K_closing_container_]->{$type_sequence_j}; return unless ( defined($K_closing) ); my $container_length = $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] - $rLL->[$Kj]->[_CUMULATIVE_LENGTH_]; my $excess = $pos + 1 + $container_length - $maximum_line_length; # Add a small tolerance for welded tokens (case b901) if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence_j) ) { $excess += 2; } if ( $excess > 0 ) { # line is too long... there is no chance of forming a one line block # if the excess is more than 1 char return if ( $excess > 1 ); # ... and give up if it is not a one-line block on input. # note: for a one-line block on input, it may be possible to keep # it as a one-line block (by removing a needless semicolon ). my $K_start = $K_to_go[$i_start]; my $ldiff = $rLL->[$K_closing]->[_LINE_INDEX_] - $rLL->[$K_start]->[_LINE_INDEX_]; return if ($ldiff); } #------------------------------------------------------------------ # Loop to check contents and length of the potential one-line block #------------------------------------------------------------------ foreach my $Ki ( $Kj + 1 .. $K_last ) { # old whitespace could be arbitrarily large, so don't use it if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 } else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] } # ignore some small blocks my $type_sequence_i = $rLL->[$Ki]->[_TYPE_SEQUENCE_]; my $nobreak = $rshort_nested->{$type_sequence_i}; # Return false result if we exceed the maximum line length, if ( $pos > $maximum_line_length ) { return; } # keep going for non-containers elsif ( !$type_sequence_i ) { } # return if we encounter another opening brace before finding the # closing brace. elsif ($rLL->[$Ki]->[_TOKEN_] eq '{' && $rLL->[$Ki]->[_TYPE_] eq '{' && $rblock_type_of_seqno->{$type_sequence_i} && !$nobreak ) { return; } # if we find our closing brace.. elsif ($rLL->[$Ki]->[_TOKEN_] eq '}' && $rLL->[$Ki]->[_TYPE_] eq '}' && $rblock_type_of_seqno->{$type_sequence_i} && !$nobreak ) { # be sure any trailing comment also fits on the line my $Ki_nonblank = $Ki; if ( $Ki_nonblank < $K_last ) { $Ki_nonblank++; if ( $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b' && $Ki_nonblank < $K_last ) { $Ki_nonblank++; } } # Patch for one-line sort/map/grep/eval blocks with side comments: # We will ignore the side comment length for sort/map/grep/eval # because this can lead to statements which change every time # perltidy is run. Here is an example from Denis Moskowitz which # oscillates between these two states without this patch: ## -------- ## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf ## @baz; ## ## grep { ## $_->foo ne 'bar' ## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf ## @baz; ## -------- # When the first line is input it gets broken apart by the main # line break logic in sub process_line_of_CODE. # When the second line is input it gets recombined by # process_line_of_CODE and passed to the output routines. The # output routines (break_long_lines) do not break it apart # because the bond strengths are set to the highest possible value # for grep/map/eval/sort blocks, so the first version gets output. # It would be possible to fix this by changing bond strengths, # but they are high to prevent errors in older versions of perl. # See c100 for eval test. if ( $Ki < $K_last && $rLL->[$K_last]->[_TYPE_] eq '#' && $rLL->[$K_last]->[_LEVEL_] == $rLL->[$Ki]->[_LEVEL_] && !$rOpts_ignore_side_comment_lengths && !$is_sort_map_grep_eval{$block_type} && $K_last - $Ki_nonblank <= 2 ) { # Only include the side comment for if/else/elsif/unless if it # immediately follows (because the current '$rbrace_follower' # logic for these will give an immediate brake after these # closing braces). So for example a line like this # if (...) { ... } ; # very long comment...... # will already break like this: # if (...) { ... } # ; # very long comment...... # so we do not need to include the length of the comment, which # would break the block. Project 'bioperl' has coding like this. ## !~ /^(if|else|elsif|unless)$/ if ( !$is_if_unless_elsif_else{$block_type} || $K_last == $Ki_nonblank ) { $Ki_nonblank = $K_last; $pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_]; if ( $Ki_nonblank > $Ki + 1 ) { # source whitespace could be anything, assume # at least one space before the hash on output if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) { $pos += 1; } else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] } } if ( $pos >= $maximum_line_length ) { return; } } } #-------------------------- # ok, it's a one-line block #-------------------------- create_one_line_block($i_start); return; } # just keep going for other characters else { } } #-------------------------------------------------- # End Loop to examine tokens in potential one-block #-------------------------------------------------- # We haven't hit the closing brace, but there is still space. So the # question here is, should we keep going to look at more lines in hopes of # forming a new one-line block, or should we stop right now. The problem # with continuing is that we will not be able to honor breaks before the # opening brace if we continue. # Typically we will want to keep trying to make one-line blocks for things # like sort/map/grep/eval. But it is not always a good idea to make as # many one-line blocks as possible, so other types are not done. The user # can always use -mangle. # If we want to keep going, we will create a new one-line block. # The blocks which we can keep going are in a hash, but we never want # to continue if we are at a '-bli' block. if ( $want_one_line_block{$block_type} && !$is_bli ) { my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence_j}; my $semicolon_count = $rtype_count && $rtype_count->{';'} ? $rtype_count->{';'} : 0; # Ignore a terminal semicolon in the count if ( $semicolon_count <= 2 ) { my $K_closing_container = $self->[_K_closing_container_]; my $K_closing_j = $K_closing_container->{$type_sequence_j}; my $Kp = $self->K_previous_nonblank($K_closing_j); if ( defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq ';' ) { $semicolon_count -= 1; } } if ( $semicolon_count <= 0 ) { create_one_line_block($i_start); } elsif ( $semicolon_count == 1 && $block_type eq 'eval' ) { # Mark short broken eval blocks for possible later use in # avoiding adding spaces before a 'package' line. This is not # essential but helps keep newer and older formatting the same. $self->[_ris_short_broken_eval_block_]->{$type_sequence_j} = 1; } else { ## ok } } return; } ## end sub starting_one_line_block sub unstore_token_to_go { # remove most recent token from output stream my $self = shift; if ( $max_index_to_go > 0 ) { $max_index_to_go--; } else { $max_index_to_go = UNDEFINED_INDEX; } return; } ## end sub unstore_token_to_go sub compare_indentation_levels { # Check to see if output line tabbing agrees with input line # this can be very useful for debugging a script which has an extra # or missing brace. my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_; return unless ( defined($K_first) ); my $rLL = $self->[_rLL_]; # ignore a line with a leading blank token - issue c195 my $type = $rLL->[$K_first]->[_TYPE_]; return if ( $type eq 'b' ); my $structural_indentation_level = $self->[_radjusted_levels_]->[$K_first]; # record max structural depth for log file if ( $structural_indentation_level > $self->[_maximum_BLOCK_level_] ) { $self->[_maximum_BLOCK_level_] = $structural_indentation_level; $self->[_maximum_BLOCK_level_at_line_] = $line_number; } my $type_sequence = $rLL->[$K_first]->[_TYPE_SEQUENCE_]; my $is_closing_block = $type_sequence && $self->[_rblock_type_of_seqno_]->{$type_sequence} && $type eq '}'; if ( $guessed_indentation_level ne $structural_indentation_level ) { $self->[_last_tabbing_disagreement_] = $line_number; if ($is_closing_block) { if ( !$self->[_in_brace_tabbing_disagreement_] ) { $self->[_in_brace_tabbing_disagreement_] = $line_number; } if ( !$self->[_first_brace_tabbing_disagreement_] ) { $self->[_first_brace_tabbing_disagreement_] = $line_number; } } if ( !$self->[_in_tabbing_disagreement_] ) { $self->[_tabbing_disagreement_count_]++; if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) { write_logfile_entry( "Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n" ); } $self->[_in_tabbing_disagreement_] = $line_number; $self->[_first_tabbing_disagreement_] = $line_number unless ( $self->[_first_tabbing_disagreement_] ); } } else { $self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block); my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_]; if ($in_tabbing_disagreement) { if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) { write_logfile_entry( "End indentation disagreement from input line $in_tabbing_disagreement\n" ); if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES ) { write_logfile_entry( "No further tabbing disagreements will be noted\n"); } } $self->[_in_tabbing_disagreement_] = 0; } } return; } ## end sub compare_indentation_levels ################################################### # CODE SECTION 8: Utilities for setting breakpoints ################################################### { ## begin closure set_forced_breakpoint my @forced_breakpoint_undo_stack; # These are global vars for efficiency: # my $forced_breakpoint_count; # my $forced_breakpoint_undo_count; # my $index_max_forced_break; # Break before or after certain tokens based on user settings my %break_before_or_after_token; BEGIN { # Updated to use all operators. This fixes case b1054 # Here is the previous simplified version: ## my @q = qw( . : ? and or xor && || ); my @q = @all_operators; push @q, ','; @break_before_or_after_token{@q} = (1) x scalar(@q); } ## end BEGIN sub set_fake_breakpoint { # Just bump up the breakpoint count as a signal that there are breaks. # This is useful if we have breaks but may want to postpone deciding # where to make them. $forced_breakpoint_count++; return; } ## end sub set_fake_breakpoint use constant DEBUG_FORCE => 0; sub set_forced_breakpoint { my ( $self, $i ) = @_; # Set a breakpoint AFTER the token at index $i in the _to_go arrays. # Exceptions: # - If the token at index $i is a blank, backup to $i-1 to # get to the previous nonblank token. # - For certain tokens, the break may be placed BEFORE the token # at index $i, depending on user break preference settings. # - If a break is made after an opening token, then a break will # also be made before the corresponding closing token. # Returns '$i_nonblank': # = index of the token after which the breakpoint was actually placed # = undef if breakpoint was not set. my $i_nonblank; if ( !defined($i) || $i < 0 ) { # Calls with bad index $i are harmless but waste time and should # be caught and eliminated during code development. if (DEVEL_MODE) { my ( $a, $b, $c ) = caller(); Fault( "Bad call to forced breakpoint from $a $b $c ; called with i=$i; please fix\n" ); } return; } # Break after token $i $i_nonblank = $self->set_forced_breakpoint_AFTER($i); # If we break at an opening container..break at the closing my $set_closing; if ( defined($i_nonblank) && $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } ) { $set_closing = 1; $self->set_closing_breakpoint($i_nonblank); } DEBUG_FORCE && do { my ( $a, $b, $c ) = caller(); my $msg = "FORCE $forced_breakpoint_count after call from $a $c with i=$i max=$max_index_to_go"; if ( !defined($i_nonblank) ) { $i = EMPTY_STRING unless defined($i); $msg .= " but could not set break after i='$i'\n"; } else { my $nobr = $nobreak_to_go[$i_nonblank]; $nobr = 0 if ( !defined($nobr) ); $msg .= <<EOM; set break after $i_nonblank: tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobr EOM if ( defined($set_closing) ) { $msg .= " Also set closing breakpoint corresponding to this token\n"; } } print {*STDOUT} $msg; }; return $i_nonblank; } ## end sub set_forced_breakpoint sub set_forced_breakpoint_AFTER { my ( $self, $i ) = @_; # This routine is only called by sub set_forced_breakpoint and # sub set_closing_breakpoint. # Set a breakpoint AFTER the token at index $i in the _to_go arrays. # Exceptions: # - If the token at index $i is a blank, backup to $i-1 to # get to the previous nonblank token. # - For certain tokens, the break may be placed BEFORE the token # at index $i, depending on user break preference settings. # Returns: # - the index of the token after which the break was set, or # - undef if no break was set return if ( !defined($i) ); return if ( $i < 0 ); # Back up at a blank so we have a token to examine. # This was added to fix for cases like b932 involving an '=' break. if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- } # Never break between welded tokens return if ( $total_weld_count && $self->[_rK_weld_right_]->{ $K_to_go[$i] } ); my $token = $tokens_to_go[$i]; my $type = $types_to_go[$i]; # For certain tokens, use user settings to decide if we break before or # after it if ( $break_before_or_after_token{$token} && ( $type eq $token || $type eq 'k' ) ) { if ( $want_break_before{$token} && $i >= 0 ) { $i-- } } # breaks are forced before 'if' and 'unless' elsif ( $is_if_unless{$token} && $type eq 'k' ) { $i-- } else { ## ok } if ( $i >= 0 && $i <= $max_index_to_go ) { my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1; if ( $i_nonblank >= 0 && !$nobreak_to_go[$i_nonblank] && !$forced_breakpoint_to_go[$i_nonblank] ) { $forced_breakpoint_to_go[$i_nonblank] = 1; if ( $i_nonblank > $index_max_forced_break ) { $index_max_forced_break = $i_nonblank; } $forced_breakpoint_count++; $forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] = $i_nonblank; # success return $i_nonblank; } } return; } ## end sub set_forced_breakpoint_AFTER sub clear_breakpoint_undo_stack { my ($self) = @_; $forced_breakpoint_undo_count = 0; return; } use constant DEBUG_UNDOBP => 0; sub undo_forced_breakpoint_stack { my ( $self, $i_start ) = @_; # Given $i_start, a non-negative index the 'undo stack' of breakpoints, # remove all breakpoints from the top of the 'undo stack' down to and # including index $i_start. # The 'undo stack' is a stack of all breakpoints made for a batch of # code. if ( $i_start < 0 ) { $i_start = 0; my ( $a, $b, $c ) = caller(); # Bad call, can only be due to a recent programming change. Fault( "Program Bug: undo_forced_breakpoint_stack from $a $c has bad i=$i_start " ) if (DEVEL_MODE); return; } while ( $forced_breakpoint_undo_count > $i_start ) { my $i = $forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ]; if ( $i >= 0 && $i <= $max_index_to_go ) { $forced_breakpoint_to_go[$i] = 0; $forced_breakpoint_count--; DEBUG_UNDOBP && do { my ( $a, $b, $c ) = caller(); print {*STDOUT} "UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"; }; } # shouldn't happen, but not a critical error else { if (DEVEL_MODE) { my ( $a, $b, $c ) = caller(); Fault(<<EOM); Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go EOM } } } return; } ## end sub undo_forced_breakpoint_stack } ## end closure set_forced_breakpoint { ## begin closure set_closing_breakpoint my %postponed_breakpoint; sub initialize_postponed_breakpoint { %postponed_breakpoint = (); return; } sub has_postponed_breakpoint { my ($seqno) = @_; return $postponed_breakpoint{$seqno}; } sub set_closing_breakpoint { # set a breakpoint at a matching closing token my ( $self, $i_break ) = @_; if ( defined( $mate_index_to_go[$i_break] ) ) { # Don't reduce the '2' in the statement below. # Test files: attrib.t, BasicLyx.pm.html if ( $mate_index_to_go[$i_break] > $i_break + 2 ) { # break before } ] and ), but sub set_forced_breakpoint will decide # to break before or after a ? and : my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1; $self->set_forced_breakpoint_AFTER( $mate_index_to_go[$i_break] - $inc ); } } else { my $type_sequence = $type_sequence_to_go[$i_break]; if ($type_sequence) { $postponed_breakpoint{$type_sequence} = 1; } } return; } ## end sub set_closing_breakpoint } ## end closure set_closing_breakpoint ######################################### # CODE SECTION 9: Process batches of code ######################################### { ## begin closure grind_batch_of_CODE # The routines in this closure begin the processing of a 'batch' of code. # A variable to keep track of consecutive nonblank lines so that we can # insert occasional blanks my @nonblank_lines_at_depth; # A variable to remember maximum size of previous batches; this is needed # by the logical padding routine my $peak_batch_size; my $batch_count; # variables to keep track of indentation of unmatched containers. my %saved_opening_indentation; sub initialize_grind_batch_of_CODE { @nonblank_lines_at_depth = (); $peak_batch_size = 0; $batch_count = 0; %saved_opening_indentation = (); return; } ## end sub initialize_grind_batch_of_CODE # sub grind_batch_of_CODE receives sections of code which are the longest # possible lines without a break. In other words, it receives what is left # after applying all breaks forced by blank lines, block comments, side # comments, pod text, and structural braces. Its job is to break this code # down into smaller pieces, if necessary, which fit within the maximum # allowed line length. Then it sends the resulting lines of code on down # the pipeline to the VerticalAligner package, breaking the code into # continuation lines as necessary. The batch of tokens are in the "to_go" # arrays. The name 'grind' is slightly suggestive of a machine continually # breaking down long lines of code, but mainly it is unique and easy to # remember and find with an editor search. # The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work # together in the following way: # - 'process_line_of_CODE' receives the original INPUT lines one-by-one and # combines them into the largest sequences of tokens which might form a new # line. # - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT # lines. # So sub 'process_line_of_CODE' builds up the longest possible continuous # sequences of tokens, regardless of line length, and then # grind_batch_of_CODE breaks these sequences back down into the new output # lines. # Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner. use constant DEBUG_GRIND => 0; sub check_grind_input { # Check for valid input to sub grind_batch_of_CODE. An error here # would most likely be due to an error in 'sub store_token_to_go'. my ($self) = @_; # Be sure there are tokens in the batch if ( $max_index_to_go < 0 ) { Fault(<<EOM); sub grind incorrectly called with max_index_to_go=$max_index_to_go EOM } my $Klimit = $self->[_Klimit_]; # The local batch tokens must be a continuous part of the global token # array. my $KK; foreach my $ii ( 0 .. $max_index_to_go ) { my $Km = $KK; $KK = $K_to_go[$ii]; if ( !defined($KK) || $KK < 0 || $KK > $Klimit ) { $KK = '(undef)' unless defined($KK); Fault(<<EOM); at batch index at i=$ii, the value of K_to_go[$ii] = '$KK' is out of the valid range (0 - $Klimit) EOM } if ( $ii > 0 && $KK != $Km + 1 ) { my $im = $ii - 1; Fault(<<EOM); Non-sequential K indexes: i=$im has Km=$Km; but i=$ii has K=$KK; expecting K = Km+1 EOM } } return; } ## end sub check_grind_input # This filter speeds up a critical if-test my %quick_filter; BEGIN { my @q = qw# L { ( [ R ] ) } ? : f => #; push @q, ','; @quick_filter{@q} = (1) x scalar(@q); } sub grind_batch_of_CODE { my ($self) = @_; #----------------------------------------------------------------- # This sub directs the formatting of one complete batch of tokens. # The tokens of the batch are in the '_to_go' arrays. #----------------------------------------------------------------- my $this_batch = $self->[_this_batch_]; $this_batch->[_peak_batch_size_] = $peak_batch_size; $this_batch->[_batch_count_] = ++$batch_count; $self->check_grind_input() if (DEVEL_MODE); # This routine is only called from sub flush_batch_of_code, so that # routine is a better spot for debugging. DEBUG_GRIND && do { my $token = my $type = EMPTY_STRING; if ( $max_index_to_go >= 0 ) { $token = $tokens_to_go[$max_index_to_go]; $type = $types_to_go[$max_index_to_go]; } my $output_str = EMPTY_STRING; if ( $max_index_to_go > 20 ) { my $mm = $max_index_to_go - 10; $output_str = join( EMPTY_STRING, @tokens_to_go[ 0 .. 10 ] ) . " ... " . join( EMPTY_STRING, @tokens_to_go[ $mm .. $max_index_to_go ] ); } else { $output_str = join EMPTY_STRING, @tokens_to_go[ 0 .. $max_index_to_go ]; } print {*STDOUT} <<EOM; grind got batch number $batch_count with $max_index_to_go tokens, last type '$type' tok='$token', text: $output_str EOM }; # Remove any trailing blank, which is possible (c192 has example) if ( $max_index_to_go >= 0 && $types_to_go[$max_index_to_go] eq 'b' ) { $max_index_to_go -= 1; } return if ( $max_index_to_go < 0 ); my $lp_object_count_this_batch; if ($rOpts_line_up_parentheses) { $this_batch->[_lp_object_count_this_batch_] = $lp_object_count_this_batch = $self->set_lp_indentation(); } #----------------------------------------------------------- # Shortcut for block comments. But not for block comments # with lp because they must use the lp corrector step below. #----------------------------------------------------------- if ( !$max_index_to_go && $types_to_go[0] eq '#' && !$lp_object_count_this_batch ) { my $ibeg = 0; $this_batch->[_ri_first_] = [$ibeg]; $this_batch->[_ri_last_] = [$ibeg]; $self->convey_batch_to_vertical_aligner(); my $level = $levels_to_go[$ibeg]; $self->[_last_line_leading_type_] = $types_to_go[$ibeg]; $self->[_last_line_leading_level_] = $level; $nonblank_lines_at_depth[$level] = 1; return; } #------------- # Normal route #------------- my $rLL = $self->[_rLL_]; #------------------------------------------------------- # Loop over the batch to initialize some batch variables #------------------------------------------------------- my $comma_count_in_batch = 0; my @colon_list; my @ix_seqno_controlling_ci; my %comma_arrow_count; my $comma_arrow_count_contained = 0; my @unmatched_closing_indexes_in_this_batch; my @unmatched_opening_indexes_in_this_batch; my @i_for_semicolon; foreach my $i ( 0 .. $max_index_to_go ) { if ( $types_to_go[$i] eq 'b' ) { $inext_to_go[$i] = $inext_to_go[ $i - 1 ] = $i + 1; next; } $inext_to_go[$i] = $i + 1; # This is an optional shortcut to save a bit of time by skipping # most tokens. Note: the filter may need to be updated if the # next 'if' tests are ever changed to include more token types. next if ( !$quick_filter{ $types_to_go[$i] } ); my $type = $types_to_go[$i]; # gather info needed by sub break_long_lines if ( $type_sequence_to_go[$i] ) { my $seqno = $type_sequence_to_go[$i]; my $token = $tokens_to_go[$i]; # remember indexes of any tokens controlling xci # in this batch. This list is needed by sub undo_ci. if ( $self->[_ris_seqno_controlling_ci_]->{$seqno} ) { push @ix_seqno_controlling_ci, $i; } if ( $is_opening_sequence_token{$token} ) { if ( $self->[_rbreak_container_]->{$seqno} ) { $self->set_forced_breakpoint($i); } push @unmatched_opening_indexes_in_this_batch, $i; if ( $type eq '?' ) { push @colon_list, $type; } } else { ## $is_closing_sequence_token{$token} if ( $i > 0 && $self->[_rbreak_container_]->{$seqno} ) { $self->set_forced_breakpoint( $i - 1 ); } my $i_mate = pop @unmatched_opening_indexes_in_this_batch; if ( defined($i_mate) && $i_mate >= 0 ) { if ( $type_sequence_to_go[$i_mate] == $type_sequence_to_go[$i] ) { $mate_index_to_go[$i] = $i_mate; $mate_index_to_go[$i_mate] = $i; my $cac = $comma_arrow_count{$seqno}; $comma_arrow_count_contained += $cac if ($cac); } else { push @unmatched_opening_indexes_in_this_batch, $i_mate; push @unmatched_closing_indexes_in_this_batch, $i; } } else { push @unmatched_closing_indexes_in_this_batch, $i; } if ( $type eq ':' ) { push @colon_list, $type; } } } ## end if ($seqno) elsif ( $type eq ',' ) { $comma_count_in_batch++; } elsif ( $type eq '=>' ) { if (@unmatched_opening_indexes_in_this_batch) { my $j = $unmatched_opening_indexes_in_this_batch[-1]; my $seqno = $type_sequence_to_go[$j]; $comma_arrow_count{$seqno}++; } } elsif ( $type eq 'f' ) { push @i_for_semicolon, $i; } else { ## not a special type } } ## end for ( my $i = 0 ; $i <=...) # Break at a single interior C-style for semicolon in this batch (c154) if ( @i_for_semicolon && @i_for_semicolon == 1 ) { my $i = $i_for_semicolon[0]; my $inext = $inext_to_go[$i]; if ( $inext <= $max_index_to_go && $types_to_go[$inext] ne '#' ) { $self->set_forced_breakpoint($i); } } my $is_unbalanced_batch = @unmatched_opening_indexes_in_this_batch + @unmatched_closing_indexes_in_this_batch; if (@unmatched_opening_indexes_in_this_batch) { $this_batch->[_runmatched_opening_indexes_] = \@unmatched_opening_indexes_in_this_batch; } if (@ix_seqno_controlling_ci) { $this_batch->[_rix_seqno_controlling_ci_] = \@ix_seqno_controlling_ci; } #------------------------ # Set special breakpoints #------------------------ # If this line ends in a code block brace, set breaks at any # previous closing code block braces to breakup a chain of code # blocks on one line. This is very rare but can happen for # user-defined subs. For example we might be looking at this: # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR { my $saw_good_break; # flag to force breaks even if short line if ( # looking for opening or closing block brace $block_type_to_go[$max_index_to_go] # never any good breaks if just one token && $max_index_to_go > 0 # but not one of these which are never duplicated on a line: # until|while|for|if|elsif|else && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] } ) { my $lev = $nesting_depth_to_go[$max_index_to_go]; # Walk backwards from the end and # set break at any closing block braces at the same level. # But quit if we are not in a chain of blocks. foreach my $i ( reverse( 0 .. $max_index_to_go - 1 ) ) { last if ( $levels_to_go[$i] < $lev ); # stop at a lower level next if ( $levels_to_go[$i] > $lev ); # skip past higher level if ( $block_type_to_go[$i] ) { if ( $tokens_to_go[$i] eq '}' ) { $self->set_forced_breakpoint($i); $saw_good_break = 1; } } # quit if we see anything besides words, function, blanks # at this level elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last } else { ## keep going } } } #----------------------------------------------- # insertion of any blank lines before this batch #----------------------------------------------- my $imin = 0; my $imax = $max_index_to_go; # trim any blank tokens - for safety, but should not be necessary if ( $types_to_go[$imin] eq 'b' ) { $imin++ } if ( $types_to_go[$imax] eq 'b' ) { $imax-- } if ( $imin > $imax ) { if (DEVEL_MODE) { my $K0 = $K_to_go[0]; my $lno = EMPTY_STRING; if ( defined($K0) ) { $lno = $rLL->[$K0]->[_LINE_INDEX_] + 1 } Fault(<<EOM); Strange: received batch containing only blanks near input line $lno: after trimming imin=$imin, imax=$imax EOM } return; } my $last_line_leading_type = $self->[_last_line_leading_type_]; my $last_line_leading_level = $self->[_last_line_leading_level_]; my $leading_type = $types_to_go[0]; my $leading_level = $levels_to_go[0]; # add blank line(s) before certain key types but not after a comment if ( $last_line_leading_type ne '#' ) { my $blank_count = 0; my $leading_token = $tokens_to_go[0]; # break before certain key blocks except one-liners if ( $leading_type eq 'k' ) { if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' ) { $blank_count = $rOpts->{'blank-lines-before-subs'} if ( terminal_type_i( 0, $max_index_to_go ) ne '}' ); } # Break before certain block types if we haven't had a # break at this level for a while. This is the # difficult decision.. elsif ($last_line_leading_type ne 'b' && $is_if_unless_while_until_for_foreach{$leading_token} ) { my $lc = $nonblank_lines_at_depth[$last_line_leading_level]; if ( !defined($lc) ) { $lc = 0 } # patch for RT #128216: no blank line inserted at a level # change if ( $levels_to_go[0] != $last_line_leading_level ) { $lc = 0; } if ( $rOpts->{'blanks-before-blocks'} && $lc >= $rOpts->{'long-block-line-count'} && $self->consecutive_nonblank_lines() >= $rOpts->{'long-block-line-count'} && terminal_type_i( 0, $max_index_to_go ) ne '}' ) { $blank_count = 1; } } else { ## no blank } } # blank lines before subs except declarations and one-liners # Fix for c250: added new type 'P', changed 'i' to 'S' elsif ( $leading_type eq 'S' || $leading_type eq 'P' ) { my $special_identifier = $self->[_ris_special_identifier_token_]->{$leading_token}; if ($special_identifier) { ## $leading_token =~ /$SUB_PATTERN/ if ( $special_identifier eq 'sub' ) { $blank_count = $rOpts->{'blank-lines-before-subs'} if ( terminal_type_i( 0, $max_index_to_go ) !~ /^[\;\}\,]$/ ); } # break before all package declarations ## substr( $leading_token, 0, 8 ) eq 'package ' elsif ( $special_identifier eq 'package' ) { # ... except in a very short eval block my $pseqno = $parent_seqno_to_go[0]; $blank_count = $rOpts->{'blank-lines-before-packages'} if ( !$self->[_ris_short_broken_eval_block_]->{$pseqno} ); } else { DEVEL_MODE && Fault(<<EOM); Found special identifier '$special_identifier', but expecting 'sub' or 'package' EOM } } } # Check for blank lines wanted before a closing brace elsif ( $leading_token eq '}' ) { if ( $rOpts->{'blank-lines-before-closing-block'} && $block_type_to_go[0] && $block_type_to_go[0] =~ /$blank_lines_before_closing_block_pattern/ ) { my $nblanks = $rOpts->{'blank-lines-before-closing-block'}; if ( $nblanks > $blank_count ) { $blank_count = $nblanks; } } } else { ## ok } if ($blank_count) { # future: send blank line down normal path to VerticalAligner? $self->flush_vertical_aligner(); my $file_writer_object = $self->[_file_writer_object_]; $file_writer_object->require_blank_code_lines($blank_count); } } # update blank line variables and count number of consecutive # non-blank, non-comment lines at this level if ( $leading_level == $last_line_leading_level && $leading_type ne '#' && defined( $nonblank_lines_at_depth[$leading_level] ) ) { $nonblank_lines_at_depth[$leading_level]++; } else { $nonblank_lines_at_depth[$leading_level] = 1; } $self->[_last_line_leading_type_] = $leading_type; $self->[_last_line_leading_level_] = $leading_level; #-------------------------- # scan lists and long lines #-------------------------- # Flag to remember if we called sub 'pad_array_to_go'. # Some routines (break_lists(), break_long_lines() ) need some # extra tokens added at the end of the batch. Most batches do not # use these routines, so we will avoid calling 'pad_array_to_go' # unless it is needed. my $called_pad_array_to_go; # set all forced breakpoints for good list formatting my $is_long_line; my $multiple_old_lines_in_batch; if ( $max_index_to_go > 0 ) { $is_long_line = $self->excess_line_length( $imin, $max_index_to_go ) > 0; my $Kbeg = $K_to_go[0]; my $Kend = $K_to_go[$max_index_to_go]; $multiple_old_lines_in_batch = $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_]; } # Optional optimization: avoid calling break_lists for a single block # brace. This is done by turning off the flag $is_unbalanced_batch. elsif ($is_unbalanced_batch) { my $block_type = $block_type_to_go[0]; if ( $block_type && !$lp_object_count_this_batch && $is_block_without_semicolon{$block_type} ) { # opening blocks can skip break_lists call if no commas in # container. if ( $leading_type eq '{' ) { my $seqno = $type_sequence_to_go[0]; my $rtype_count = $self->[_rtype_count_by_seqno_]->{$seqno}; if ($rtype_count) { my $comma_count = $rtype_count->{','}; if ( !$comma_count ) { $is_unbalanced_batch = 0; } } } # closing block braces can be skipped else { $is_unbalanced_batch = 0; } } } else { ## ok - single token } my $rbond_strength_bias = []; if ( $is_long_line || $multiple_old_lines_in_batch # must always call break_lists() with unbalanced batches because # it is maintaining some stacks || $is_unbalanced_batch # call break_lists if we might want to break at commas || ( $comma_count_in_batch && ( $rOpts_maximum_fields_per_table > 0 && $rOpts_maximum_fields_per_table <= $comma_count_in_batch || $rOpts_comma_arrow_breakpoints == 0 ) ) # call break_lists if user may want to break open some one-line # hash references || ( $comma_arrow_count_contained && $rOpts_comma_arrow_breakpoints != 3 ) ) { # add a couple of extra terminal blank tokens $self->pad_array_to_go(); $called_pad_array_to_go = 1; my $sgb = $self->break_lists( $is_long_line, $rbond_strength_bias ); $saw_good_break ||= $sgb; } # let $ri_first and $ri_last be references to lists of # first and last tokens of line fragments to output.. my ( $ri_first, $ri_last ); #----------------------------- # a single token uses one line #----------------------------- if ( !$max_index_to_go ) { $ri_first = [$imin]; $ri_last = [$imax]; } # for multiple tokens else { #------------------------- # write a single line if.. #------------------------- if ( ( # this line is 'short' !$is_long_line # and we didn't see a good breakpoint && !$saw_good_break # and we don't already have an interior breakpoint && !$forced_breakpoint_count ) # or, we aren't allowed to add any newlines || !$rOpts_add_newlines ) { $ri_first = [$imin]; $ri_last = [$imax]; } #----------------------------- # otherwise use multiple lines #----------------------------- else { # add a couple of extra terminal blank tokens if we haven't # already done so $self->pad_array_to_go() unless ($called_pad_array_to_go); ( $ri_first, $ri_last, my $rbond_strength_to_go ) = $self->break_long_lines( $saw_good_break, \@colon_list, $rbond_strength_bias ); $self->break_all_chain_tokens( $ri_first, $ri_last ); $self->break_equals( $ri_first, $ri_last ) if @{$ri_first} >= 3; # now we do a correction step to clean this up a bit # (The only time we would not do this is for debugging) $self->recombine_breakpoints( $ri_first, $ri_last, $rbond_strength_to_go ) if ( $rOpts_recombine && @{$ri_first} > 1 ); $self->insert_final_ternary_breaks( $ri_first, $ri_last ) if (@colon_list); } $self->insert_breaks_before_list_opening_containers( $ri_first, $ri_last ) if ( %break_before_container_types && $max_index_to_go > 0 ); # Check for a phantom semicolon at the end of the batch if ( !$token_lengths_to_go[$imax] && $types_to_go[$imax] eq ';' ) { $self->unmask_phantom_token($imax); } if ( $rOpts_one_line_block_semicolons == 0 ) { $self->delete_one_line_semicolons( $ri_first, $ri_last ); } # Remember the largest batch size processed. This is needed by the # logical padding routine to avoid padding the first nonblank token if ( $max_index_to_go > $peak_batch_size ) { $peak_batch_size = $max_index_to_go; } } #------------------- # -lp corrector step #------------------- if ($lp_object_count_this_batch) { $self->correct_lp_indentation( $ri_first, $ri_last ); } #-------------------- # ship this batch out #-------------------- $this_batch->[_ri_first_] = $ri_first; $this_batch->[_ri_last_] = $ri_last; $self->convey_batch_to_vertical_aligner(); #------------------------------------------------------------------- # Write requested number of blank lines after an opening block brace #------------------------------------------------------------------- if ($rOpts_blank_lines_after_opening_block) { my $iterm = $imax; if ( $types_to_go[$iterm] eq '#' && $iterm > $imin ) { $iterm -= 1; if ( $types_to_go[$iterm] eq 'b' && $iterm > $imin ) { $iterm -= 1; } } if ( $types_to_go[$iterm] eq '{' && $block_type_to_go[$iterm] && $block_type_to_go[$iterm] =~ /$blank_lines_after_opening_block_pattern/ ) { my $nblanks = $rOpts_blank_lines_after_opening_block; $self->flush_vertical_aligner(); my $file_writer_object = $self->[_file_writer_object_]; $file_writer_object->require_blank_code_lines($nblanks); } } return; } ## end sub grind_batch_of_CODE sub iprev_to_go { my ($i) = @_; # Given index $i of a token in the '_to_go' arrays, return # the index of the previous nonblank token. return $i - 1 > 0 && $types_to_go[ $i - 1 ] eq 'b' ? $i - 2 : $i - 1; } sub unmask_phantom_token { my ( $self, $iend ) = @_; # Turn a phantom token into a real token. # Input parameter: # $iend = the index in the output batch array of this token. # Phantom tokens are specially marked token types (such as ';') with # no token text which only become real tokens if they occur at the end # of an output line. At one time phantom ',' tokens were handled # here, but now they are processed elsewhere. my $rLL = $self->[_rLL_]; my $KK = $K_to_go[$iend]; my $line_number = 1 + $rLL->[$KK]->[_LINE_INDEX_]; my $type = $types_to_go[$iend]; return unless ( $type eq ';' ); my $tok = $type; my $tok_len = length($tok); if ( $want_left_space{$type} != WS_NO ) { $tok = SPACE . $tok; $tok_len += 1; } $tokens_to_go[$iend] = $tok; $token_lengths_to_go[$iend] = $tok_len; $rLL->[$KK]->[_TOKEN_] = $tok; $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len; $self->note_added_semicolon($line_number); # This changes the summed lengths of the rest of this batch foreach ( $iend .. $max_index_to_go ) { $summed_lengths_to_go[ $_ + 1 ] += $tok_len; } return; } ## end sub unmask_phantom_token sub save_opening_indentation { # This should be called after each batch of tokens is output. It # saves indentations of lines of all unmatched opening tokens. # These will be used by sub get_opening_indentation. my ( $self, $ri_first, $ri_last, $rindentation_list, $runmatched_opening_indexes ) = @_; $runmatched_opening_indexes = [] if ( !defined($runmatched_opening_indexes) ); # QW INDENTATION PATCH 1: # Also save indentation for multiline qw quotes my @i_qw; my $seqno_qw_opening; if ( $types_to_go[$max_index_to_go] eq 'q' ) { my $KK = $K_to_go[$max_index_to_go]; $seqno_qw_opening = $self->[_rstarting_multiline_qw_seqno_by_K_]->{$KK}; if ($seqno_qw_opening) { push @i_qw, $max_index_to_go; } } # we need to save indentations of any unmatched opening tokens # in this batch because we may need them in a subsequent batch. foreach ( @{$runmatched_opening_indexes}, @i_qw ) { my $seqno = $type_sequence_to_go[$_]; if ( !$seqno ) { if ( $seqno_qw_opening && $_ == $max_index_to_go ) { $seqno = $seqno_qw_opening; } else { # shouldn't happen $seqno = 'UNKNOWN'; DEVEL_MODE && Fault("unable to find sequence number\n"); } } $saved_opening_indentation{$seqno} = [ lookup_opening_indentation( $_, $ri_first, $ri_last, $rindentation_list ) ]; } return; } ## end sub save_opening_indentation sub get_saved_opening_indentation { my ($seqno) = @_; my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 ); if ($seqno) { if ( $saved_opening_indentation{$seqno} ) { ( $indent, $offset, $is_leading ) = @{ $saved_opening_indentation{$seqno} }; $exists = 1; } } # some kind of serious error it doesn't exist # (example is badfile.t) return ( $indent, $offset, $is_leading, $exists ); } ## end sub get_saved_opening_indentation } ## end closure grind_batch_of_CODE sub lookup_opening_indentation { # get the indentation of the line in the current output batch # which output a selected opening token # # given: # $i_opening - index of an opening token in the current output batch # whose line indentation we need # $ri_first - reference to list of the first index $i for each output # line in this batch # $ri_last - reference to list of the last index $i for each output line # in this batch # $rindentation_list - reference to a list containing the indentation # used for each line. (NOTE: the first slot in # this list is the last returned line number, and this is # followed by the list of indentations). # # return # -the indentation of the line which contained token $i_opening # -and its offset (number of columns) from the start of the line my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_; if ( !@{$ri_last} ) { # An error here implies a bug introduced by a recent program change. # Every batch of code has lines, so this should never happen. if (DEVEL_MODE) { Fault("Error in opening_indentation: no lines"); } return ( 0, 0, 0 ); } my $nline = $rindentation_list->[0]; # line number of previous lookup # reset line location if necessary $nline = 0 if ( $i_opening < $ri_start->[$nline] ); # find the correct line if ( $i_opening <= $ri_last->[-1] ) { while ( $i_opening > $ri_last->[$nline] ) { $nline++; } } # Error - token index is out of bounds - shouldn't happen # A program bug has been introduced in one of the calling routines. # We better stop here. else { my $i_last_line = $ri_last->[-1]; if (DEVEL_MODE) { Fault(<<EOM); Program bug in call to lookup_opening_indentation - index out of range called with index i_opening=$i_opening > $i_last_line = max index of last line This batch has max index = $max_index_to_go, EOM } $nline = $#{$ri_last}; } $rindentation_list->[0] = $nline; # save line number to start looking next call my $ibeg = $ri_start->[$nline]; my $offset = token_sequence_length( $ibeg, $i_opening ) - 1; my $is_leading = ( $ibeg == $i_opening ); return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading ); } ## end sub lookup_opening_indentation sub terminal_type_i { # returns type of last token on this line (terminal token), as follows: # returns # for a full-line comment # returns ' ' for a blank line # otherwise returns final token type my ( $ibeg, $iend ) = @_; # Start at the end and work backwards my $i = $iend; my $type_i = $types_to_go[$i]; # Check for side comment if ( $type_i eq '#' ) { $i--; if ( $i < $ibeg ) { return wantarray ? ( $type_i, $ibeg ) : $type_i; } $type_i = $types_to_go[$i]; } # Skip past a blank if ( $type_i eq 'b' ) { $i--; if ( $i < $ibeg ) { return wantarray ? ( $type_i, $ibeg ) : $type_i; } $type_i = $types_to_go[$i]; } # Found it..make sure it is a BLOCK termination, # but hide a terminal } after sort/map/grep/eval/do because it is not # necessarily the end of the line. (terminal.t) my $block_type = $block_type_to_go[$i]; if ( $type_i eq '}' && ( !$block_type || $is_sort_map_grep_eval_do{$block_type} ) ) { $type_i = 'b'; } return wantarray ? ( $type_i, $i ) : $type_i; } ## end sub terminal_type_i sub pad_array_to_go { # To simplify coding in break_lists and set_bond_strengths, it helps to # create some extra blank tokens at the end of the arrays. We also add # some undef's to help guard against using invalid data. my ($self) = @_; $K_to_go[ $max_index_to_go + 1 ] = undef; $tokens_to_go[ $max_index_to_go + 1 ] = EMPTY_STRING; $tokens_to_go[ $max_index_to_go + 2 ] = EMPTY_STRING; $tokens_to_go[ $max_index_to_go + 3 ] = undef; $types_to_go[ $max_index_to_go + 1 ] = 'b'; $types_to_go[ $max_index_to_go + 2 ] = 'b'; $types_to_go[ $max_index_to_go + 3 ] = undef; $nesting_depth_to_go[ $max_index_to_go + 2 ] = undef; $nesting_depth_to_go[ $max_index_to_go + 1 ] = $nesting_depth_to_go[$max_index_to_go]; # /^[R\}\)\]]$/ if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) { if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) { # Nesting depths are set to be >=0 in sub write_line, so it should # not be possible to get here unless the code has a bracing error # which leaves a closing brace with zero nesting depth. if ( !get_saw_brace_error() ) { if (DEVEL_MODE) { Fault(<<EOM); Program bug in pad_array_to_go: hit nesting error which should have been caught EOM } } } else { $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1; } } # /^[L\{\(\[]$/ elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) { $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1; } else { ## must be ? or : } return; } ## end sub pad_array_to_go sub break_all_chain_tokens { # scan the current breakpoints looking for breaks at certain "chain # operators" (. : && || + etc) which often occur repeatedly in a long # statement. If we see a break at any one, break at all similar tokens # within the same container. # my ( $self, $ri_left, $ri_right ) = @_; my %saw_chain_type; my %left_chain_type; my %right_chain_type; my %interior_chain_type; my $nmax = @{$ri_right} - 1; # scan the left and right end tokens of all lines my $count = 0; for my $n ( 0 .. $nmax ) { my $il = $ri_left->[$n]; my $ir = $ri_right->[$n]; my $typel = $types_to_go[$il]; my $typer = $types_to_go[$ir]; $typel = '+' if ( $typel eq '-' ); # treat + and - the same $typer = '+' if ( $typer eq '-' ); $typel = '*' if ( $typel eq '/' ); # treat * and / the same $typer = '*' if ( $typer eq '/' ); my $keyl = $typel eq 'k' ? $tokens_to_go[$il] : $typel; my $keyr = $typer eq 'k' ? $tokens_to_go[$ir] : $typer; if ( $is_chain_operator{$keyl} && $want_break_before{$typel} ) { next if ( $typel eq '?' ); push @{ $left_chain_type{$keyl} }, $il; $saw_chain_type{$keyl} = 1; $count++; } if ( $is_chain_operator{$keyr} && !$want_break_before{$typer} ) { next if ( $typer eq '?' ); push @{ $right_chain_type{$keyr} }, $ir; $saw_chain_type{$keyr} = 1; $count++; } } return unless $count; # now look for any interior tokens of the same types $count = 0; my $has_interior_dot_or_plus; for my $n ( 0 .. $nmax ) { my $il = $ri_left->[$n]; my $ir = $ri_right->[$n]; foreach my $i ( $il + 1 .. $ir - 1 ) { my $type = $types_to_go[$i]; my $key = $type eq 'k' ? $tokens_to_go[$i] : $type; $key = '+' if ( $key eq '-' ); $key = '*' if ( $key eq '/' ); if ( $saw_chain_type{$key} ) { push @{ $interior_chain_type{$key} }, $i; $count++; $has_interior_dot_or_plus ||= ( $key eq '.' || $key eq '+' ); } } } return unless $count; my @keys = keys %saw_chain_type; # quit if just ONE continuation line with leading . For example-- # print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{' # . $contents; # Fixed for b1399. if ( $has_interior_dot_or_plus && $nmax == 1 && @keys == 1 ) { return; } # now make a list of all new break points my @insert_list; # loop over all chain types foreach my $key (@keys) { # loop over all interior chain tokens foreach my $itest ( @{ $interior_chain_type{$key} } ) { # loop over all left end tokens of same type if ( $left_chain_type{$key} ) { next if $nobreak_to_go[ $itest - 1 ]; foreach my $i ( @{ $left_chain_type{$key} } ) { next unless $self->in_same_container_i( $i, $itest ); push @insert_list, $itest - 1; # Break at matching ? if this : is at a different level. # For example, the ? before $THRf_DEAD in the following # should get a break if its : gets a break. # # my $flags = # ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE # : ( $_ & 4 ) ? $THRf_R_DETACHED # : $THRf_R_JOINABLE; if ( $key eq ':' && $levels_to_go[$i] != $levels_to_go[$itest] ) { my $i_question = $mate_index_to_go[$itest]; if ( defined($i_question) && $i_question > 0 ) { push @insert_list, $i_question - 1; } } last; } } # loop over all right end tokens of same type if ( $right_chain_type{$key} ) { next if $nobreak_to_go[$itest]; foreach my $i ( @{ $right_chain_type{$key} } ) { next unless $self->in_same_container_i( $i, $itest ); push @insert_list, $itest; # break at matching ? if this : is at a different level if ( $key eq ':' && $levels_to_go[$i] != $levels_to_go[$itest] ) { my $i_question = $mate_index_to_go[$itest]; if ( defined($i_question) ) { push @insert_list, $i_question; } } last; } } } } # insert any new break points if (@insert_list) { $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); } return; } ## end sub break_all_chain_tokens sub insert_additional_breaks { # this routine will add line breaks at requested locations after # sub break_long_lines has made preliminary breaks. my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_; my $i_f; my $i_l; my $line_number = 0; foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) { next if ( $nobreak_to_go[$i_break_left] ); $i_f = $ri_first->[$line_number]; $i_l = $ri_last->[$line_number]; while ( $i_break_left >= $i_l ) { $line_number++; # shouldn't happen unless caller passes bad indexes if ( $line_number >= @{$ri_last} ) { if (DEVEL_MODE) { Fault(<<EOM); Non-fatal program bug: couldn't set break at $i_break_left EOM } return; } $i_f = $ri_first->[$line_number]; $i_l = $ri_last->[$line_number]; } # Do not leave a blank at the end of a line; back up if necessary if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- } my $i_break_right = $inext_to_go[$i_break_left]; if ( $i_break_left >= $i_f && $i_break_left < $i_l && $i_break_right > $i_f && $i_break_right <= $i_l ) { splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) ); splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) ); } } return; } ## end sub insert_additional_breaks { ## begin closure in_same_container_i my $ris_break_token; my $ris_comma_token; BEGIN { # all cases break on seeing commas at same level my @q = qw( => ); push @q, ','; @{$ris_comma_token}{@q} = (1) x scalar(@q); # Non-ternary text also breaks on seeing any of qw(? : || or ) # Example: we would not want to break at any of these .'s # : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>" push @q, qw( or || ? : ); @{$ris_break_token}{@q} = (1) x scalar(@q); } ## end BEGIN sub in_same_container_i { # Check to see if tokens at i1 and i2 are in the same container, and # not separated by certain characters: => , ? : || or # This is an interface between the _to_go arrays to the rLL array my ( $self, $i1, $i2 ) = @_; # quick check my $parent_seqno_1 = $parent_seqno_to_go[$i1]; return if ( $parent_seqno_to_go[$i2] ne $parent_seqno_1 ); if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) } my $K1 = $K_to_go[$i1]; my $K2 = $K_to_go[$i2]; my $rLL = $self->[_rLL_]; my $depth_1 = $nesting_depth_to_go[$i1]; return if ( $depth_1 < 0 ); # Shouldn't happen since i1 and i2 have same parent: return unless ( $nesting_depth_to_go[$i2] == $depth_1 ); # Select character set to scan for my $type_1 = $types_to_go[$i1]; my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token; # Fast preliminary loop to verify that tokens are in the same container my $KK = $K1; while (1) { $KK = $rLL->[$KK]->[_KNEXT_SEQ_ITEM_]; last if !defined($KK); last if ( $KK >= $K2 ); my $ii = $i1 + $KK - $K1; my $depth_i = $nesting_depth_to_go[$ii]; return if ( $depth_i < $depth_1 ); next if ( $depth_i > $depth_1 ); if ( $type_1 ne ':' ) { my $tok_i = $tokens_to_go[$ii]; return if ( $tok_i eq '?' || $tok_i eq ':' ); } } # Slow loop checking for certain characters #----------------------------------------------------- # This is potentially a slow routine and not critical. # For safety just give up for large differences. # See test file 'infinite_loop.txt' #----------------------------------------------------- return if ( $i2 - $i1 > 200 ); foreach my $ii ( $i1 + 1 .. $i2 - 1 ) { my $depth_i = $nesting_depth_to_go[$ii]; next if ( $depth_i > $depth_1 ); return if ( $depth_i < $depth_1 ); my $tok_i = $tokens_to_go[$ii]; return if ( $rbreak->{$tok_i} ); } return 1; } ## end sub in_same_container_i } ## end closure in_same_container_i sub break_equals { # Look for assignment operators that could use a breakpoint. # For example, in the following snippet # # $HOME = $ENV{HOME} # || $ENV{LOGDIR} # || $pw[7] # || die "no home directory for user $<"; # # we could break at the = to get this, which is a little nicer: # $HOME = # $ENV{HOME} # || $ENV{LOGDIR} # || $pw[7] # || die "no home directory for user $<"; # # The logic here follows the logic in set_logical_padding, which # will add the padding in the second line to improve alignment. # my ( $self, $ri_left, $ri_right ) = @_; my $nmax = @{$ri_right} - 1; return if ( $nmax < 2 ); # scan the left ends of first two lines my $tokbeg = EMPTY_STRING; my $depth_beg; for my $n ( 1 .. 2 ) { my $il = $ri_left->[$n]; my $typel = $types_to_go[$il]; my $tokenl = $tokens_to_go[$il]; my $keyl = $typel eq 'k' ? $tokenl : $typel; my $has_leading_op = $is_chain_operator{$keyl}; return unless ($has_leading_op); if ( $n > 1 ) { return unless ( $tokenl eq $tokbeg && $nesting_depth_to_go[$il] eq $depth_beg ); } $tokbeg = $tokenl; $depth_beg = $nesting_depth_to_go[$il]; } # now look for any interior tokens of the same types my $il = $ri_left->[0]; my $ir = $ri_right->[0]; # now make a list of all new break points my @insert_list; foreach my $i ( reverse( $il + 1 .. $ir - 1 ) ) { my $type = $types_to_go[$i]; if ( $is_assignment{$type} && $nesting_depth_to_go[$i] eq $depth_beg ) { if ( $want_break_before{$type} ) { push @insert_list, $i - 1; } else { push @insert_list, $i; } } } # Break after a 'return' followed by a chain of operators # return ( $^O !~ /win32|dos/i ) # && ( $^O ne 'VMS' ) # && ( $^O ne 'OS2' ) # && ( $^O ne 'MacOS' ); # To give: # return # ( $^O !~ /win32|dos/i ) # && ( $^O ne 'VMS' ) # && ( $^O ne 'OS2' ) # && ( $^O ne 'MacOS' ); my $i = 0; if ( $types_to_go[$i] eq 'k' && $tokens_to_go[$i] eq 'return' && $ir > $il && $nesting_depth_to_go[$i] eq $depth_beg ) { push @insert_list, $i; } return unless (@insert_list); # One final check... # scan second and third lines and be sure there are no assignments # we want to avoid breaking at an = to make something like this: # unless ( $icon = # $html_icons{"$type-$state"} # or $icon = $html_icons{$type} # or $icon = $html_icons{$state} ) for my $n ( 1 .. 2 ) { my $il_n = $ri_left->[$n]; my $ir_n = $ri_right->[$n]; foreach my $i ( $il_n + 1 .. $ir_n ) { my $type = $types_to_go[$i]; return if ( $is_assignment{$type} && $nesting_depth_to_go[$i] eq $depth_beg ); } } # ok, insert any new break point if (@insert_list) { $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); } return; } ## end sub break_equals { ## begin closure recombine_breakpoints # This routine is called once per batch to see if it would be better # to combine some of the lines into which the batch has been broken. my %is_amp_amp; my %is_math_op; my %is_plus_minus; my %is_mult_div; BEGIN { my @q; @q = qw( && || ); @is_amp_amp{@q} = (1) x scalar(@q); @q = qw( + - * / ); @is_math_op{@q} = (1) x scalar(@q); @q = qw( + - ); @is_plus_minus{@q} = (1) x scalar(@q); @q = qw( * / ); @is_mult_div{@q} = (1) x scalar(@q); } ## end BEGIN sub Debug_dump_breakpoints { # Debug routine to dump current breakpoints...not normally called # We are given indexes to the current lines: # $ri_beg = ref to array of BEGinning indexes of each line # $ri_end = ref to array of ENDing indexes of each line my ( $self, $ri_beg, $ri_end, $msg ) = @_; print {*STDOUT} "----Dumping breakpoints from: $msg----\n"; for my $n ( 0 .. @{$ri_end} - 1 ) { my $ibeg = $ri_beg->[$n]; my $iend = $ri_end->[$n]; my $text = EMPTY_STRING; foreach my $i ( $ibeg .. $iend ) { $text .= $tokens_to_go[$i]; } print {*STDOUT} "$n ($ibeg:$iend) $text\n"; } print {*STDOUT} "----\n"; return; } ## end sub Debug_dump_breakpoints sub delete_one_line_semicolons { my ( $self, $ri_beg, $ri_end ) = @_; my $rLL = $self->[_rLL_]; my $K_opening_container = $self->[_K_opening_container_]; # Walk down the lines of this batch and delete any semicolons # terminating one-line blocks; my $nmax = @{$ri_end} - 1; foreach my $n ( 0 .. $nmax ) { my $i_beg = $ri_beg->[$n]; my $i_e = $ri_end->[$n]; my $K_beg = $K_to_go[$i_beg]; my $K_e = $K_to_go[$i_e]; my $K_end = $K_e; my $type_end = $rLL->[$K_end]->[_TYPE_]; if ( $type_end eq '#' ) { $K_end = $self->K_previous_nonblank($K_end); if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; } } # we are looking for a line ending in closing brace next unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' ); # ...and preceded by a semicolon on the same line my $K_semicolon = $self->K_previous_nonblank($K_end); next unless defined($K_semicolon); my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg ); next if ( $i_semicolon <= $i_beg ); next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' ); # Safety check - shouldn't happen - not critical # This is not worth throwing a Fault, except in DEVEL_MODE if ( $types_to_go[$i_semicolon] ne ';' ) { DEVEL_MODE && Fault("unexpected type looking for semicolon"); next; } # ... with the corresponding opening brace on the same line my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_]; my $K_opening = $K_opening_container->{$type_sequence}; next unless ( defined($K_opening) ); my $i_opening = $i_beg + ( $K_opening - $K_beg ); next if ( $i_opening < $i_beg ); # ... and only one semicolon between these braces my $semicolon_count = 0; foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) { if ( $rLL->[$K]->[_TYPE_] eq ';' ) { $semicolon_count++; last; } } next if ($semicolon_count); # ...ok, then make the semicolon invisible my $len = $token_lengths_to_go[$i_semicolon]; $tokens_to_go[$i_semicolon] = EMPTY_STRING; $token_lengths_to_go[$i_semicolon] = 0; $rLL->[$K_semicolon]->[_TOKEN_] = EMPTY_STRING; $rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0; foreach ( $i_semicolon .. $max_index_to_go ) { $summed_lengths_to_go[ $_ + 1 ] -= $len; } } return; } ## end sub delete_one_line_semicolons use constant DEBUG_RECOMBINE => 0; sub recombine_breakpoints { my ( $self, $ri_beg, $ri_end, $rbond_strength_to_go ) = @_; # This sub implements the 'recombine' operation on a batch. # Its task is to combine some of these lines back together to # improve formatting. The need for this arises because # sub 'break_long_lines' is very liberal in setting line breaks # for long lines, always setting breaks at good breakpoints, even # when that creates small lines. Sometimes small line fragments # are produced which would look better if they were combined. # Input parameters: # $ri_beg = ref to array of BEGinning indexes of each line # $ri_end = ref to array of ENDing indexes of each line # $rbond_strength_to_go = array of bond strengths pulling # tokens together, used to decide where best to recombine lines. #------------------------------------------------------------------- # Do nothing under extreme stress; use <= 2 for c171. # (NOTE: New optimizations make this unnecessary. But removing this # check is not really useful because this condition only occurs in # test runs, and another formatting pass will fix things anyway.) # This routine has a long history of improvements. Some past # relevant issues are : c118, c167, c171, c186, c187, c193, c200. #------------------------------------------------------------------- return if ( $high_stress_level <= 2 ); my $nmax_start = @{$ri_end} - 1; return if ( $nmax_start <= 0 ); my $iend_max = $ri_end->[$nmax_start]; if ( $types_to_go[$iend_max] eq '#' ) { $iend_max = iprev_to_go($iend_max); } my $has_terminal_semicolon = $iend_max >= 0 && $types_to_go[$iend_max] eq ';'; #-------------------------------------------------------------------- # Break into the smallest possible sub-sections to improve efficiency #-------------------------------------------------------------------- # Also make a list of all good joining tokens between the lines # n-1 and n. my @joint; my $rsections = []; my $nbeg_sec = 0; my $nend_sec; my $nmax_section = 0; foreach my $nn ( 1 .. $nmax_start ) { my $ibeg_1 = $ri_beg->[ $nn - 1 ]; my $iend_1 = $ri_end->[ $nn - 1 ]; my $iend_2 = $ri_end->[$nn]; my $ibeg_2 = $ri_beg->[$nn]; # Define certain good joint tokens my ( $itok, $itokp, $itokm ); foreach my $itest ( $iend_1, $ibeg_2 ) { my $type = $types_to_go[$itest]; if ( $is_math_op{$type} || $is_amp_amp{$type} || $is_assignment{$type} || $type eq ':' ) { $itok = $itest; } } # joint[$nn] = index of joint character $joint[$nn] = $itok; # Update the section list my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 ); if ( $excess <= 1 # The number 5 here is an arbitrary small number intended # to keep most small matches in one sub-section. || ( defined($nend_sec) && ( $nn < 5 || $nmax_start - $nn < 5 ) ) ) { $nend_sec = $nn; } else { if ( defined($nend_sec) ) { push @{$rsections}, [ $nbeg_sec, $nend_sec ]; my $num = $nend_sec - $nbeg_sec; if ( $num > $nmax_section ) { $nmax_section = $num } $nbeg_sec = $nn; $nend_sec = undef; } $nbeg_sec = $nn; } } if ( defined($nend_sec) ) { push @{$rsections}, [ $nbeg_sec, $nend_sec ]; my $num = $nend_sec - $nbeg_sec; if ( $num > $nmax_section ) { $nmax_section = $num } } my $num_sections = @{$rsections}; if ( DEBUG_RECOMBINE > 1 ) { print {*STDOUT} <<EOM; sections=$num_sections; nmax_sec=$nmax_section EOM } if ( DEBUG_RECOMBINE > 0 ) { my $max = 0; print {*STDOUT} "-----\n$num_sections sections found for nmax=$nmax_start\n"; foreach my $sect ( @{$rsections} ) { my ( $nbeg, $nend ) = @{$sect}; my $num = $nend - $nbeg; if ( $num > $max ) { $max = $num } print {*STDOUT} "$nbeg $nend\n"; } print {*STDOUT} "max size=$max of $nmax_start lines\n"; } # Loop over all sub-sections. Note that we have to work backwards # from the end of the batch since the sections use original line # numbers, and the line numbers change as we go. while ( my $section = pop @{$rsections} ) { my ( $nbeg, $nend ) = @{$section}; $self->recombine_section_loop( { _ri_beg => $ri_beg, _ri_end => $ri_end, _nbeg => $nbeg, _nend => $nend, _rjoint => \@joint, _rbond_strength_to_go => $rbond_strength_to_go, _has_terminal_semicolon => $has_terminal_semicolon, } ); } return; } ## end sub recombine_breakpoints sub recombine_section_loop { my ( $self, $rhash ) = @_; # Recombine breakpoints for one section of lines in the current batch # Given: # $ri_beg, $ri_end = ref to arrays with token indexes of the first # and last line # $nbeg, $nend = line numbers bounding this section # $rjoint = ref to array of good joining tokens per line # Update: $ri_beg, $ri_end, $rjoint if lines are joined # Returns: # nothing #------------- # Definitions: #------------- # $rhash = { # _ri_beg = ref to array with starting token index by line # _ri_end = ref to array with ending token index by line # _nbeg = first line number of this section # _nend = last line number of this section # _rjoint = ref to array of good joining tokens for each line # _rbond_strength_to_go = array of bond strengths # _has_terminal_semicolon = true if last line of batch has ';' # _num_freeze = fixed number of lines at end of this batch # _optimization_on = true during final optimization loop # _num_compares = total number of line compares made so far # _pair_list = list of line pairs in optimal search order # }; #------------- # How it works #------------- # We are working with a sequence of output lines and looking at # each pair. We must decide if it is better to join each of # these line pairs. # The brute force method is to loop through all line pairs and # join the best possible pair, as determined by either some # logical criterion or by the maximum 'bond strength' assigned # to the joining token. Then keep doing this until there are # no remaining line pairs to join. # This works, but a problem is that it can theoretically take # on the order of N^2 comparisons in some pathological cases. # This can require an excessive amount of run time. # We can avoid excessive run time by conceptually dividing the # work into two phases. In the first phase we make any joints # required by user settings or logic other than the strength of # joints. In the second phase we make any remaining joints # based on strengths. To do this optimally, we do a preliminary # sort on joint strengths and always loop in that order. That # way, we can stop a search on the first joint strength because # it will be the maximum. # This method is very fast, requiring no more than 3*N line # comparisons, where N is the number of lines (see below). my $ri_beg = $rhash->{_ri_beg}; my $ri_end = $rhash->{_ri_end}; # Line index range of this section: my $nbeg = $rhash->{_nbeg}; # stays constant my $nend = $rhash->{_nend}; # will decrease # $nmax_batch = starting number of lines in the full batch # $num_freeze = number of lines following this section to leave alone my $nmax_batch = @{$ri_end} - 1; $rhash->{_num_freeze} = $nmax_batch - $nend; # Setup the list of line pairs to test. This stores the following # values for each line pair: # [ $n=index of the second line of the pair, $bs=bond strength] my @pair_list; my $rbond_strength_to_go = $rhash->{_rbond_strength_to_go}; foreach my $n ( $nbeg + 1 .. $nend ) { my $iend_1 = $ri_end->[ $n - 1 ]; my $ibeg_2 = $ri_beg->[$n]; my $bs_tweak = 0; if ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) { $bs_tweak = 0.25 } my $bs = $rbond_strength_to_go->[$iend_1] + $bs_tweak; push @pair_list, [ $n, $bs ]; } # Any order for testing is possible, but optimization is only possible # if we sort the line pairs on decreasing joint strength. @pair_list = sort { $b->[1] <=> $a->[1] || $a->[0] <=> $b->[0] } @pair_list; $rhash->{_rpair_list} = \@pair_list; #---------------- # Iteration limit #---------------- # This is now a very fast loop which runs in O(n) time, but a # check on total number of iterations is retained to guard # against future programming errors. # Most cases require roughly 1 comparison per line pair (1 full pass). # The upper bound is estimated to be about 3 comparisons per line pair # unless optimization is deactivated. The approximate breakdown is: # 1 pass with 1 compare per joint to do any special cases, plus # 1 pass with up to 2 compares per joint in optimization mode # The most extreme cases in my collection are: # camel1.t - needs 2.7 compares per line (12 without optimization) # ternary.t - needs 2.8 compares per line (12 without optimization) # c206 - needs 3.3 compares per line, found with random testing # So a value of MAX_COMPARE_RATIO = 4 looks like an upper bound as # long as optimization is used. A value of 20 should allow all code to # pass even if optimization is turned off for testing. use constant MAX_COMPARE_RATIO => DEVEL_MODE ? 4 : 20; my $num_pairs = $nend - $nbeg + 1; my $max_compares = MAX_COMPARE_RATIO * $num_pairs; # Always start with optimization off $rhash->{_num_compares} = 0; $rhash->{_optimization_on} = 0; $rhash->{_ix_best_last} = 0; #-------------------------------------------- # loop until there are no more recombinations #-------------------------------------------- my $nmax_last = $nmax_batch + 1; while (1) { # Stop when the number of lines in the batch does not decrease $nmax_batch = @{$ri_end} - 1; if ( $nmax_batch >= $nmax_last ) { last; } $nmax_last = $nmax_batch; #----------------------------------------- # inner loop to find next best combination #----------------------------------------- $self->recombine_inner_loop($rhash); # Iteration limit check: if ( $rhash->{_num_compares} > $max_compares ) { # See note above; should only get here on a programming error if (DEVEL_MODE) { my $ibeg = $ri_beg->[$nbeg]; my $Kbeg = $K_to_go[$ibeg]; my $lno = $self->[_rLL_]->[$Kbeg]->[_LINE_INDEX_]; Fault(<<EOM); inner loop passes =$rhash->{_num_compares} exceeds max=$max_compares, near line $lno EOM } last; } } ## end iteration loop if (DEBUG_RECOMBINE) { my $ratio = sprintf "%0.3f", $rhash->{_num_compares} / $num_pairs; print {*STDOUT} "exiting recombine_inner_loop with $nmax_last lines, opt=$rhash->{_optimization_on}, starting pairs=$num_pairs, num_compares=$rhash->{_num_compares}, ratio=$ratio\n"; } return; } ## end sub recombine_section_loop sub recombine_inner_loop { my ( $self, $rhash ) = @_; # This is the inner loop of the recombine operation. We look at all of # the remaining joints in this section and select the best joint to be # recombined. If a recombination is made, the number of lines # in this section will be reduced by one. # Returns: nothing my $rK_weld_right = $self->[_rK_weld_right_]; my $rK_weld_left = $self->[_rK_weld_left_]; my $ri_beg = $rhash->{_ri_beg}; my $ri_end = $rhash->{_ri_end}; my $nbeg = $rhash->{_nbeg}; my $rjoint = $rhash->{_rjoint}; my $rbond_strength_to_go = $rhash->{_rbond_strength_to_go}; my $rpair_list = $rhash->{_rpair_list}; # This will remember the best joint: my $n_best = 0; my $bs_best = 0.; my $ix_best = 0; my $num_bs = 0; # The range of lines in this group is $nbeg to $nstop my $nmax = @{$ri_end} - 1; my $nstop = $nmax - $rhash->{_num_freeze}; my $num_joints = $nstop - $nbeg; # Turn off optimization if just two joints remain to allow # special two-line logic to be checked (c193) if ( $rhash->{_optimization_on} && $num_joints <= 2 ) { $rhash->{_optimization_on} = 0; } # Start where we ended the last search my $ix_start = $rhash->{_ix_best_last}; # Keep the starting index in bounds $ix_start = max( 0, $ix_start ); # Make a search order list which cycles around to visit # all line pairs. my $ix_max = @{$rpair_list} - 1; my @ix_list = ( $ix_start .. $ix_max, 0 .. $ix_start - 1 ); my $ix_last = $ix_list[-1]; #------------------------- # loop over all line pairs #------------------------- my $incomplete_loop; foreach my $ix (@ix_list) { my $item = $rpair_list->[$ix]; my ( $n, $bs ) = @{$item}; # This flag will be true if we 'last' out of this loop early. # We cannot turn on optimization if this is true. $incomplete_loop = $ix != $ix_last; # Update the count of the number of times through this inner loop $rhash->{_num_compares}++; #---------------------------------------------------------- # If we join the current pair of lines, # line $n-1 will become the left part of the joined line # line $n will become the right part of the joined line # # Here are Indexes of the endpoint tokens of the two lines: # # -----line $n-1--- | -----line $n----- # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 # ^ # | # We want to decide if we should remove the line break # between the tokens at $iend_1 and $ibeg_2 # # We will apply a number of ad-hoc tests to see if joining # here will look ok. The code will just move to the next # pair if the join doesn't look good. If we get through # the gauntlet of tests, the lines will be recombined. #---------------------------------------------------------- # # beginning and ending tokens of the lines we are working on my $ibeg_1 = $ri_beg->[ $n - 1 ]; my $iend_1 = $ri_end->[ $n - 1 ]; my $iend_2 = $ri_end->[$n]; my $ibeg_2 = $ri_beg->[$n]; # The combined line cannot be too long my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 ); next if ( $excess > 0 ); my $type_iend_1 = $types_to_go[$iend_1]; my $type_iend_2 = $types_to_go[$iend_2]; my $type_ibeg_1 = $types_to_go[$ibeg_1]; my $type_ibeg_2 = $types_to_go[$ibeg_2]; DEBUG_RECOMBINE > 1 && do { print {*STDOUT} "RECOMBINE: ix=$ix iend1=$iend_1 iend2=$iend_2 n=$n nmax=$nmax if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n"; }; # If line $n is the last line, we set some flags and # do any special checks for it my $this_line_is_semicolon_terminated; if ( $n == $nmax ) { if ( $type_ibeg_2 eq '{' ) { # join isolated ')' and '{' if requested (git #110) if ( $rOpts_cuddled_paren_brace && $type_iend_1 eq '}' && $iend_1 == $ibeg_1 && $ibeg_2 == $iend_2 ) { if ( $tokens_to_go[$iend_1] eq ')' && $tokens_to_go[$ibeg_2] eq '{' ) { $n_best = $n; $ix_best = $ix; last; } } # otherwise, a terminal '{' should stay where it is # unless preceded by a fat comma next if ( $type_iend_1 ne '=>' ); } $this_line_is_semicolon_terminated = $rhash->{_has_terminal_semicolon}; } #---------------------------------------------------------- # Recombine Section 0: # Examine the special token joining this line pair, if any. # Put as many tests in this section to avoid duplicate code # and to make formatting independent of whether breaks are # to the left or right of an operator. #---------------------------------------------------------- my $itok = $rjoint->[$n]; if ($itok) { my $ok_0 = recombine_section_0( $itok, $ri_beg, $ri_end, $n ); next if ( !$ok_0 ); } #---------------------------------------------------------- # Recombine Section 1: # Join welded nested containers immediately #---------------------------------------------------------- if ( $total_weld_count && ( $type_sequence_to_go[$iend_1] && defined( $rK_weld_right->{ $K_to_go[$iend_1] } ) || $type_sequence_to_go[$ibeg_2] && defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) ) ) { $n_best = $n; $ix_best = $ix; last; } #---------------------------------------------------------- # Recombine Section 2: # Examine token at $iend_1 (right end of first line of pair) #---------------------------------------------------------- my ( $ok_2, $skip_Section_3 ) = recombine_section_2( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated ); next if ( !$ok_2 ); #---------------------------------------------------------- # Recombine Section 3: # Examine token at $ibeg_2 (left end of second line of pair) #---------------------------------------------------------- # Join lines identified above as capable of # causing an outdented line with leading closing paren. # Note that we are skipping the rest of this section # and the rest of the loop to do the join. if ($skip_Section_3) { $forced_breakpoint_to_go[$iend_1] = 0; $n_best = $n; $ix_best = $ix; $incomplete_loop = 1; last; } my ( $ok_3, $bs_tweak ) = recombine_section_3( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated ); next if ( !$ok_3 ); #---------------------------------------------------------- # Recombine Section 4: # Combine the lines if we arrive here and it is possible #---------------------------------------------------------- # honor hard breakpoints next if ( $forced_breakpoint_to_go[$iend_1] ); if (DEVEL_MODE) { # This fault can only occur if an array index error has been # introduced by a recent programming change. my $bs_check = $rbond_strength_to_go->[$iend_1] + $bs_tweak; if ( $bs_check != $bs ) { Fault(<<EOM); bs=$bs != $bs_check for break after type $type_iend_1 ix=$ix n=$n EOM } } # Require a few extra spaces before recombining lines if we # are at an old breakpoint unless this is a simple list or # terminal line. The goal is to avoid oscillating between # two quasi-stable end states. For example this snippet # caused problems: ## my $this = ## bless { ## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]" ## }, ## $type; next if ( $old_breakpoint_to_go[$iend_1] && !$this_line_is_semicolon_terminated && $n < $nmax && $excess + 4 > 0 && $type_iend_2 ne ',' ); # do not recombine if we would skip in indentation levels if ( $n < $nmax ) { my $if_next = $ri_beg->[ $n + 1 ]; next if ( $levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2] && $levels_to_go[$ibeg_2] < $levels_to_go[$if_next] # but an isolated 'if (' is undesirable && !( $n == 1 && $iend_1 - $ibeg_1 <= 2 && $type_ibeg_1 eq 'k' && $tokens_to_go[$ibeg_1] eq 'if' && $tokens_to_go[$iend_1] ne '(' ) ); } ## OLD: honor no-break's ## next if ( $bs >= NO_BREAK - 1 ); # removed for b1257 # remember the pair with the greatest bond strength if ( !$n_best ) { # First good joint ... $n_best = $n; $ix_best = $ix; $bs_best = $bs; $num_bs = 1; # In optimization mode: stop on the first acceptable joint # because we already know it has the highest strength if ( $rhash->{_optimization_on} == 1 ) { last; } } else { # Second and later joints .. $num_bs++; # save maximum strength; in case of a tie select min $n if ( $bs > $bs_best || $bs == $bs_best && $n < $n_best ) { $n_best = $n; $ix_best = $ix; $bs_best = $bs; } } } ## end loop over all line pairs #--------------------------------------------------- # recombine the pair with the greatest bond strength #--------------------------------------------------- if ($n_best) { DEBUG_RECOMBINE > 1 && print "BEST: nb=$n_best nbeg=$nbeg stop=$nstop bs=$bs_best\n"; splice @{$ri_beg}, $n_best, 1; splice @{$ri_end}, $n_best - 1, 1; splice @{$rjoint}, $n_best, 1; splice @{$rpair_list}, $ix_best, 1; # Update the line indexes in the pair list: # Old $n values greater than the best $n decrease by 1 # because of the splice we just did. foreach my $item ( @{$rpair_list} ) { my $n_old = $item->[0]; if ( $n_old > $n_best ) { $item->[0] -= 1 } } # Store the index of this location for starting the next search. # We must subtract 1 to get an updated index because the splice # above just removed the best pair. # BUT CAUTION: if this is the first pair in the pair list, then # this produces an invalid index. So this index must be tested # before use in the next pass through the outer loop. $rhash->{_ix_best_last} = $ix_best - 1; # Turn on optimization if ... if ( # it is not already on, and !$rhash->{_optimization_on} # we have not taken a shortcut to get here, and && !$incomplete_loop # we have seen a good break on strength, and && $num_bs ) { # To deactivate optimization for testing purposes, the next # line can be commented out. This will increase run time. $rhash->{_optimization_on} = 1; if (DEBUG_RECOMBINE) { my $num_compares = $rhash->{_num_compares}; my $pair_count = @ix_list; print {*STDOUT} "Entering optimization phase at $num_compares compares, pair count = $pair_count\n"; } } } return; } ## end sub recombine_inner_loop sub recombine_section_0 { my ( $itok, $ri_beg, $ri_end, $n ) = @_; # Recombine Section 0: # Examine special candidate joining token $itok # Given: # $itok = index of token at a possible join of lines $n-1 and $n # Return: # true => ok to combine # false => do not combine lines # Here are Indexes of the endpoint tokens of the two lines: # # -----line $n-1--- | -----line $n----- # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 # ^ ^ # | | # ------------$itok is one of these tokens # Put as many tests in this section to avoid duplicate code # and to make formatting independent of whether breaks are # to the left or right of an operator. my $nmax = @{$ri_end} - 1; my $ibeg_1 = $ri_beg->[ $n - 1 ]; my $iend_1 = $ri_end->[ $n - 1 ]; my $ibeg_2 = $ri_beg->[$n]; my $iend_2 = $ri_end->[$n]; if ($itok) { my $type = $types_to_go[$itok]; if ( $type eq ':' ) { # do not join at a colon unless it disobeys the # break request if ( $itok eq $iend_1 ) { return unless $want_break_before{$type}; } else { return if $want_break_before{$type}; } } ## end if ':' # handle math operators + - * / elsif ( $is_math_op{$type} ) { # Combine these lines if this line is a single # number, or if it is a short term with same # operator as the previous line. For example, in # the following code we will combine all of the # short terms $A, $B, $C, $D, $E, $F, together # instead of leaving them one per line: # my $time = # $A * $B * $C * $D * $E * $F * # ( 2. * $eps * $sigma * $area ) * # ( 1. / $tcold**3 - 1. / $thot**3 ); # This can be important in math-intensive code. my $good_combo; my $itokp = min( $inext_to_go[$itok], $iend_2 ); my $itokpp = min( $inext_to_go[$itokp], $iend_2 ); my $itokm = max( iprev_to_go($itok), $ibeg_1 ); my $itokmm = max( iprev_to_go($itokm), $ibeg_1 ); # check for a number on the right if ( $types_to_go[$itokp] eq 'n' ) { # ok if nothing else on right if ( $itokp == $iend_2 ) { $good_combo = 1; } else { # look one more token to right.. # okay if math operator or some termination $good_combo = ( ( $itokpp == $iend_2 ) && $is_math_op{ $types_to_go[$itokpp] } ) || $types_to_go[$itokpp] =~ /^[#,;]$/; } } # check for a number on the left if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) { # okay if nothing else to left if ( $itokm == $ibeg_1 ) { $good_combo = 1; } # otherwise look one more token to left else { # okay if math operator, comma, or assignment $good_combo = ( $itokmm == $ibeg_1 ) && ( $is_math_op{ $types_to_go[$itokmm] } || $types_to_go[$itokmm] =~ /^[,]$/ || $is_assignment{ $types_to_go[$itokmm] } ); } } # look for a single short token either side of the # operator if ( !$good_combo ) { # Slight adjustment factor to make results # independent of break before or after operator # in long summed lists. (An operator and a # space make two spaces). my $two = ( $itok eq $iend_1 ) ? 2 : 0; $good_combo = # numbers or id's on both sides of this joint $types_to_go[$itokp] =~ /^[in]$/ && $types_to_go[$itokm] =~ /^[in]$/ # one of the two lines must be short: && ( ( # no more than 2 nonblank tokens right # of joint $itokpp == $iend_2 # short && token_sequence_length( $itokp, $iend_2 ) < $two + $rOpts_short_concatenation_item_length ) || ( # no more than 2 nonblank tokens left of # joint $itokmm == $ibeg_1 # short && token_sequence_length( $ibeg_1, $itokm ) < 2 - $two + $rOpts_short_concatenation_item_length ) ) # keep pure terms; don't mix +- with */ && !( $is_plus_minus{$type} && ( $is_mult_div{ $types_to_go[$itokmm] } || $is_mult_div{ $types_to_go[$itokpp] } ) ) && !( $is_mult_div{$type} && ( $is_plus_minus{ $types_to_go[$itokmm] } || $is_plus_minus{ $types_to_go[$itokpp] } ) ) ; } # it is also good to combine if we can reduce to 2 # lines if ( !$good_combo ) { # index on other line where same token would be # in a long chain. my $iother = ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1; $good_combo = $n == 2 && $n == $nmax && $types_to_go[$iother] ne $type; } return unless ($good_combo); } ## end math elsif ( $is_amp_amp{$type} ) { ##TBD } ## end &&, || elsif ( $is_assignment{$type} ) { ##TBD } else { ## ok - not a special type } ## end assignment } # ok to combine lines return 1; } ## end sub recombine_section_0 sub recombine_section_2 { my ( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated ) = @_; # Recombine Section 2: # Examine token at $iend_1 (right end of first line of pair) # Here are Indexes of the endpoint tokens of the two lines: # # -----line $n-1--- | -----line $n----- # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 # ^ # | # -----Section 2 looks at this token # Returns: # (nothing) => do not join lines # 1, skip_Section_3 => ok to join lines # $skip_Section_3 is a flag for skipping the next section my $skip_Section_3 = 0; my $nmax = @{$ri_end} - 1; my $ibeg_1 = $ri_beg->[ $n - 1 ]; my $iend_1 = $ri_end->[ $n - 1 ]; my $iend_2 = $ri_end->[$n]; my $ibeg_2 = $ri_beg->[$n]; my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1; my $ibeg_nmax = $ri_beg->[$nmax]; my $type_iend_1 = $types_to_go[$iend_1]; my $type_iend_2 = $types_to_go[$iend_2]; my $type_ibeg_1 = $types_to_go[$ibeg_1]; my $type_ibeg_2 = $types_to_go[$ibeg_2]; # an isolated '}' may join with a ';' terminated segment if ( $type_iend_1 eq '}' ) { # Check for cases where combining a semicolon terminated # statement with a previous isolated closing paren will # allow the combined line to be outdented. This is # generally a good move. For example, we can join up # the last two lines here: # ( # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, # $size, $atime, $mtime, $ctime, $blksize, $blocks # ) # = stat($file); # # to get: # ( # $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, # $size, $atime, $mtime, $ctime, $blksize, $blocks # ) = stat($file); # # which makes the parens line up. # # Another example, from Joe Matarazzo, probably looks best # with the 'or' clause appended to the trailing paren: # $self->some_method( # PARAM1 => 'foo', # PARAM2 => 'bar' # ) or die "Some_method didn't work"; # # But we do not want to do this for something like the -lp # option where the paren is not outdentable because the # trailing clause will be far to the right. # # The logic here is synchronized with the logic in sub # sub get_final_indentation, which actually does # the outdenting. # my $combine_ok = $this_line_is_semicolon_terminated # only one token on last line && $ibeg_1 == $iend_1 # must be structural paren && $tokens_to_go[$iend_1] eq ')' # style must allow outdenting, && !$closing_token_indentation{')'} # but leading colons probably line up with a # previous colon or question (count could be wrong). && $type_ibeg_2 ne ':' # only one step in depth allowed. this line must not # begin with a ')' itself. && ( $nesting_depth_to_go[$iend_1] == $nesting_depth_to_go[$iend_2] + 1 ); # But only combine leading '&&', '||', if no previous && || : # seen. This count includes these tokens at all levels. The # idea is that seeing these at any level can make it hard to read # formatting if we recombine. if ( $is_amp_amp{$type_ibeg_2} ) { foreach my $n_t ( reverse( 0 .. $n - 2 ) ) { my $ibeg_t = $ri_beg->[$n_t]; my $type_t = $types_to_go[$ibeg_t]; if ( $is_amp_amp{$type_t} || $type_t eq ':' ) { $combine_ok = 0; last; } } } $skip_Section_3 ||= $combine_ok; # YVES patch 2 of 2: # Allow cuddled eval chains, like this: # eval { # #STUFF; # 1; # return true # } or do { # #handle error # }; # This patch works together with a patch in # setting adjusted indentation (where the closing eval # brace is outdented if possible). # The problem is that an 'eval' block has continuation # indentation and it looks better to undo it in some # cases. If we do not use this patch we would get: # eval { # #STUFF; # 1; # return true # } # or do { # #handle error # }; # The alternative, for uncuddled style, is to create # a patch in get_final_indentation which undoes # the indentation of a leading line like 'or do {'. # This doesn't work well with -icb through if ( $block_type_to_go[$iend_1] && $rOpts_brace_follower_vertical_tightness > 0 && ( # -bfvt=1, allow cuddled eval chains [default] ( $tokens_to_go[$iend_2] eq '{' && $block_type_to_go[$iend_1] eq 'eval' && !ref( $leading_spaces_to_go[$iend_1] ) && !$rOpts_indent_closing_brace ) # -bfvt=2, allow most brace followers [part of git #110] || ( $rOpts_brace_follower_vertical_tightness > 1 && $ibeg_1 == $iend_1 ) ) && ( ( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ ) || ( $type_ibeg_2 eq 'k' && $is_and_or{ $tokens_to_go[$ibeg_2] } ) || $is_if_unless{ $tokens_to_go[$ibeg_2] } ) ) { $skip_Section_3 ||= 1; } return unless ( $skip_Section_3 # handle '.' and '?' specially below || ( $type_ibeg_2 =~ /^[\.\?]$/ ) # fix for c054 (unusual -pbp case) || $type_ibeg_2 eq '==' ); } elsif ( $type_iend_1 eq '{' ) { # YVES # honor breaks at opening brace # Added to prevent recombining something like this: # } || eval { package main; return if ( $forced_breakpoint_to_go[$iend_1] ); } # do not recombine lines with ending &&, ||, elsif ( $is_amp_amp{$type_iend_1} ) { return unless ( $want_break_before{$type_iend_1} ); } # Identify and recombine a broken ?/: chain elsif ( $type_iend_1 eq '?' ) { # Do not recombine different levels return if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] ); # do not recombine unless next line ends in : return unless $type_iend_2 eq ':'; } # for lines ending in a comma... elsif ( $type_iend_1 eq ',' ) { # Do not recombine at comma which is following the # input bias. # NOTE: this could be controlled by a special flag, # but it seems to work okay. return if ( $old_breakpoint_to_go[$iend_1] ); # An isolated '},' may join with an identifier + ';' # This is useful for the class of a 'bless' statement # (bless.t) if ( $type_ibeg_1 eq '}' && $type_ibeg_2 eq 'i' ) { return unless ( ( $ibeg_1 == ( $iend_1 - 1 ) ) && ( $iend_2 == ( $ibeg_2 + 1 ) ) && $this_line_is_semicolon_terminated ); # override breakpoint $forced_breakpoint_to_go[$iend_1] = 0; } # but otherwise .. else { # do not recombine after a comma unless this will # leave just 1 more line return if ( $n + 1 < $nmax ); # do not recombine if there is a change in # indentation depth return if ( $levels_to_go[$iend_1] != $levels_to_go[$iend_2] ); # do not recombine a "complex expression" after a # comma. "complex" means no parens. my $saw_paren; foreach my $ii ( $ibeg_2 .. $iend_2 ) { if ( $tokens_to_go[$ii] eq '(' ) { $saw_paren = 1; last; } } return if $saw_paren; } } # opening paren.. elsif ( $type_iend_1 eq '(' ) { # No longer doing this } elsif ( $type_iend_1 eq ')' ) { # No longer doing this } # keep a terminal for-semicolon elsif ( $type_iend_1 eq 'f' ) { return; } # if '=' at end of line ... elsif ( $is_assignment{$type_iend_1} ) { # keep break after = if it was in input stream # this helps prevent 'blinkers' return if ( $old_breakpoint_to_go[$iend_1] # don't strand an isolated '=' && $iend_1 != $ibeg_1 ); my $is_short_quote = ( $type_ibeg_2 eq 'Q' && $ibeg_2 == $iend_2 && token_sequence_length( $ibeg_2, $ibeg_2 ) < $rOpts_short_concatenation_item_length ); my $is_ternary = ( $type_ibeg_1 eq '?' && ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) ); # always join an isolated '=', a short quote, or if this # will put ?/: at start of adjacent lines if ( $ibeg_1 != $iend_1 && !$is_short_quote && !$is_ternary ) { my $combine_ok = ( ( # unless we can reduce this to two lines $nmax < $n + 2 # or three lines, the last with a leading # semicolon || ( $nmax == $n + 2 && $types_to_go[$ibeg_nmax] eq ';' ) # or the next line ends with a here doc || $type_iend_2 eq 'h' # or the next line ends in an open paren or # brace and the break hasn't been forced # [dima.t] || (!$forced_breakpoint_to_go[$iend_1] && $type_iend_2 eq '{' ) ) # do not recombine if the two lines might align # well this is a very approximate test for this && ( # RT#127633 - the leading tokens are not # operators ( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] ) # or they are different || ( $ibeg_3 >= 0 && $type_ibeg_2 ne $types_to_go[$ibeg_3] ) ) ); return if ( !$combine_ok ); if ( # Recombine if we can make two lines $nmax >= $n + 2 # -lp users often prefer this: # my $title = function($env, $env, $sysarea, # "bubba Borrower Entry"); # so we will recombine if -lp is used we have # ending comma && !( $ibeg_3 > 0 && ref( $leading_spaces_to_go[$ibeg_3] ) && $type_iend_2 eq ',' ) ) { # otherwise, scan the rhs line up to last token for # complexity. Note that we are not counting the last token # in case it is an opening paren. my $ok = simple_rhs( $ri_end, $n, $nmax, $ibeg_2, $iend_2 ); return if ( !$ok ); } } if ( $tokens_to_go[$ibeg_2] !~ /^[\{\(\[]$/ ) { $forced_breakpoint_to_go[$iend_1] = 0; } } # for keywords.. elsif ( $type_iend_1 eq 'k' ) { # make major control keywords stand out # (recombine.t) return if ( #/^(last|next|redo|return)$/ $is_last_next_redo_return{ $tokens_to_go[$iend_1] } # but only if followed by multiple lines && $n < $nmax ); if ( $is_and_or{ $tokens_to_go[$iend_1] } ) { return unless $want_break_before{ $tokens_to_go[$iend_1] }; } } elsif ( $type_iend_1 eq '.' ) { # NOTE: the logic here should match that of section 3 so that # line breaks are independent of choice of break before or after. # It would be nice to combine them in section 0, but the # special junction case ') .' makes that difficult. # This section added to fix issues c172, c174. my $i_next_nonblank = $ibeg_2; my $summed_len_1 = $summed_lengths_to_go[ $iend_1 + 1 ] - $summed_lengths_to_go[$ibeg_1]; my $summed_len_2 = $summed_lengths_to_go[ $iend_2 + 1 ] - $summed_lengths_to_go[$ibeg_2]; my $iend_1_minus = max( $ibeg_1, iprev_to_go($iend_1) ); my $combine_ok = ( # ... unless there is just one and we can reduce # this to two lines if we do. For example, this # # # $bodyA .= # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;' # # looks better than this: # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' . # '$args .= $pat;' # check for 2 lines, not in a long broken '.' chain ( $n == 2 && $n == $nmax && $type_iend_1 ne $type_iend_2 ) # ... or this would strand a short quote , like this # "some long quote" . # "\n"; || ( $types_to_go[$i_next_nonblank] eq 'Q' && $i_next_nonblank >= $iend_2 - 2 && $token_lengths_to_go[$i_next_nonblank] < $rOpts_short_concatenation_item_length # additional constraints to fix c167 && ( $types_to_go[$iend_1_minus] ne 'Q' || $summed_len_2 < $summed_len_1 ) ) ); return if ( !$combine_ok ); } else { ## ok - not a special type } return ( 1, $skip_Section_3 ); } ## end sub recombine_section_2 sub simple_rhs { my ( $ri_end, $n, $nmax, $ibeg_2, $iend_2 ) = @_; # Scan line ibeg_2 to $iend_2 up to last token for complexity. # We are not counting the last token in case it is an opening paren. # Return: # true if rhs is simple, ok to recombine # false otherwise my $tv = 0; my $depth = $nesting_depth_to_go[$ibeg_2]; foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) { if ( $nesting_depth_to_go[$i] != $depth ) { $tv++; last if ( $tv > 1 ); } $depth = $nesting_depth_to_go[$i]; } # ok to recombine if no level changes before # last token if ( $tv > 0 ) { # otherwise, do not recombine if more than # two level changes. return if ( $tv > 1 ); # check total complexity of the two # adjacent lines that will occur if we do # this join my $istop = ( $n < $nmax ) ? $ri_end->[ $n + 1 ] : $iend_2; foreach my $i ( $iend_2 .. $istop ) { if ( $nesting_depth_to_go[$i] != $depth ) { $tv++; last if ( $tv > 2 ); } $depth = $nesting_depth_to_go[$i]; } # do not recombine if total is more than 2 # level changes return if ( $tv > 2 ); } return 1; } ## end sub simple_rhs sub recombine_section_3 { my ( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated ) = @_; # Recombine Section 3: # Examine token at $ibeg_2 (right end of first line of pair) # Here are Indexes of the endpoint tokens of the two lines: # # -----line $n-1--- | -----line $n----- # $ibeg_1 $iend_1 | $ibeg_2 $iend_2 # ^ # | # -----Section 3 looks at this token # Returns: # (nothing) => do not join lines # 1, bs_tweak => ok to join lines # $bstweak is a small tolerance to add to bond strengths my $bs_tweak = 0; my $nmax = @{$ri_end} - 1; my $ibeg_1 = $ri_beg->[ $n - 1 ]; my $iend_1 = $ri_end->[ $n - 1 ]; my $iend_2 = $ri_end->[$n]; my $ibeg_2 = $ri_beg->[$n]; my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1; my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1; my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1; my $ibeg_nmax = $ri_beg->[$nmax]; my $type_iend_1 = $types_to_go[$iend_1]; my $type_iend_2 = $types_to_go[$iend_2]; my $type_ibeg_1 = $types_to_go[$ibeg_1]; my $type_ibeg_2 = $types_to_go[$ibeg_2]; # handle lines with leading &&, || if ( $is_amp_amp{$type_ibeg_2} ) { # ok to recombine if it follows a ? or : # and is followed by an open paren.. my $ok = ( $is_ternary{$type_ibeg_1} && $tokens_to_go[$iend_2] eq '(' ) # or is followed by a ? or : at same depth # # We are looking for something like this. We can # recombine the && line with the line above to make the # structure more clear: # return # exists $G->{Attr}->{V} # && exists $G->{Attr}->{V}->{$u} # ? %{ $G->{Attr}->{V}->{$u} } # : (); # # We should probably leave something like this alone: # return # exists $G->{Attr}->{E} # && exists $G->{Attr}->{E}->{$u} # && exists $G->{Attr}->{E}->{$u}->{$v} # ? %{ $G->{Attr}->{E}->{$u}->{$v} } # : (); # so that we either have all of the &&'s (or ||'s) # on one line, as in the first example, or break at # each one as in the second example. However, it # sometimes makes things worse to check for this because # it prevents multiple recombinations. So this is not done. || ( $ibeg_3 >= 0 && $is_ternary{ $types_to_go[$ibeg_3] } && $nesting_depth_to_go[$ibeg_3] == $nesting_depth_to_go[$ibeg_2] ); # Combine a trailing && term with an || term: fix for # c060 This is rare but can happen. $ok ||= 1 if ( $ibeg_3 < 0 && $type_ibeg_2 eq '&&' && $type_ibeg_1 eq '||' && $nesting_depth_to_go[$ibeg_2] == $nesting_depth_to_go[$ibeg_1] ); return if !$ok && $want_break_before{$type_ibeg_2}; $forced_breakpoint_to_go[$iend_1] = 0; # tweak the bond strength to give this joint priority # over ? and : $bs_tweak = 0.25; } # Identify and recombine a broken ?/: chain elsif ( $type_ibeg_2 eq '?' ) { # Do not recombine different levels my $lev = $levels_to_go[$ibeg_2]; return if ( $lev ne $levels_to_go[$ibeg_1] ); # Do not recombine a '?' if either next line or # previous line does not start with a ':'. The reasons # are that (1) no alignment of the ? will be possible # and (2) the expression is somewhat complex, so the # '?' is harder to see in the interior of the line. my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':'; my $precedes_colon = $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':'; return unless ( $follows_colon || $precedes_colon ); # we will always combining a ? line following a : line if ( !$follows_colon ) { # ...otherwise recombine only if it looks like a # chain. we will just look at a few nearby lines # to see if this looks like a chain. my $local_count = 0; foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) { $local_count++ if $ii >= 0 && $types_to_go[$ii] eq ':' && $levels_to_go[$ii] == $lev; } return if ( $local_count <= 1 ); } $forced_breakpoint_to_go[$iend_1] = 0; } # do not recombine lines with leading '.' elsif ( $type_ibeg_2 eq '.' ) { my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 ); my $summed_len_1 = $summed_lengths_to_go[ $iend_1 + 1 ] - $summed_lengths_to_go[$ibeg_1]; my $summed_len_2 = $summed_lengths_to_go[ $iend_2 + 1 ] - $summed_lengths_to_go[$ibeg_2]; my $combine_ok = ( # ... unless there is just one and we can reduce # this to two lines if we do. For example, this # # # $bodyA .= # '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;' # # looks better than this: # $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' # . '$args .= $pat;' ( $n == 2 && $n == $nmax && $type_ibeg_1 ne $type_ibeg_2 ) # ... or this would strand a short quote , like this # . "some long quote" # . "\n"; || ( $types_to_go[$i_next_nonblank] eq 'Q' && $i_next_nonblank >= $iend_2 - 1 && $token_lengths_to_go[$i_next_nonblank] < $rOpts_short_concatenation_item_length # additional constraints to fix c167 && ( $types_to_go[$iend_1] ne 'Q' # allow a term shorter than the previous term || $summed_len_2 < $summed_len_1 # or allow a short semicolon-terminated term if this # makes two lines (see c169) || ( $n == 2 && $n == $nmax && $this_line_is_semicolon_terminated ) ) ) ); return if ( !$combine_ok ); } # handle leading keyword.. elsif ( $type_ibeg_2 eq 'k' ) { # handle leading "or" if ( $tokens_to_go[$ibeg_2] eq 'or' ) { my $combine_ok = ( $this_line_is_semicolon_terminated && ( $type_ibeg_1 eq '}' || ( # following 'if' or 'unless' or 'or' $type_ibeg_1 eq 'k' && $is_if_unless{ $tokens_to_go[$ibeg_1] } # important: only combine a very simple # or statement because the step below # may have combined a trailing 'and' # with this or, and we do not want to # then combine everything together && ( $iend_2 - $ibeg_2 <= 7 ) ) ) ); return if ( !$combine_ok ); #X: RT #81854 $forced_breakpoint_to_go[$iend_1] = 0 if ( !$old_breakpoint_to_go[$iend_1] ); } # handle leading 'and' and 'xor' elsif ($tokens_to_go[$ibeg_2] eq 'and' || $tokens_to_go[$ibeg_2] eq 'xor' ) { # Decide if we will combine a single terminal 'and' # after an 'if' or 'unless'. # This looks best with the 'and' on the same # line as the 'if': # # $a = 1 # if $seconds and $nu < 2; # # But this looks better as shown: # # $a = 1 # if !$this->{Parents}{$_} # or $this->{Parents}{$_} eq $_; # return unless ( $this_line_is_semicolon_terminated && ( # following 'if' or 'unless' or 'or' $type_ibeg_1 eq 'k' && ( $is_if_unless{ $tokens_to_go[$ibeg_1] } || $tokens_to_go[$ibeg_1] eq 'or' ) ) ); } # handle leading "if" and "unless" elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) { # Combine something like: # next # if ( $lang !~ /${l}$/i ); # into: # next if ( $lang !~ /${l}$/i ); return unless ( $this_line_is_semicolon_terminated # previous line begins with 'and' or 'or' && $type_ibeg_1 eq 'k' && $is_and_or{ $tokens_to_go[$ibeg_1] } ); } # handle all other leading keywords else { # keywords look best at start of lines, # but combine things like "1 while" if ( !$is_assignment{$type_iend_1} ) { return if ( ( $type_iend_1 ne 'k' ) && ( $tokens_to_go[$ibeg_2] ne 'while' ) ); } } } # similar treatment of && and || as above for 'and' and # 'or': NOTE: This block of code is currently bypassed # because of a previous block but is retained for possible # future use. elsif ( $is_amp_amp{$type_ibeg_2} ) { # maybe looking at something like: # unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i; return unless ( $this_line_is_semicolon_terminated # previous line begins with an 'if' or 'unless' # keyword && $type_ibeg_1 eq 'k' && $is_if_unless{ $tokens_to_go[$ibeg_1] } ); } # handle line with leading = or similar elsif ( $is_assignment{$type_ibeg_2} ) { return unless ( $n == 1 || $n == $nmax ); return if ( $old_breakpoint_to_go[$iend_1] ); return unless ( # unless we can reduce this to two lines $nmax == 2 # or three lines, the last with a leading semicolon || ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' ) # or the next line ends with a here doc || $type_iend_2 eq 'h' # or this is a short line ending in ; || ( $n == $nmax && $this_line_is_semicolon_terminated ) ); $forced_breakpoint_to_go[$iend_1] = 0; } else { ## ok - not a special type } return ( 1, $bs_tweak ); } ## end sub recombine_section_3 } ## end closure recombine_breakpoints sub insert_final_ternary_breaks { my ( $self, $ri_left, $ri_right ) = @_; # Called once per batch to look for and do any final line breaks for # long ternary chains my $nmax = @{$ri_right} - 1; # scan the left and right end tokens of all lines my $i_first_colon = -1; for my $n ( 0 .. $nmax ) { my $il = $ri_left->[$n]; my $ir = $ri_right->[$n]; my $typel = $types_to_go[$il]; my $typer = $types_to_go[$ir]; return if ( $typel eq '?' ); return if ( $typer eq '?' ); if ( $typel eq ':' ) { $i_first_colon = $il; last; } if ( $typer eq ':' ) { $i_first_colon = $ir; last; } } # For long ternary chains, # if the first : we see has its ? is in the interior # of a preceding line, then see if there are any good # breakpoints before the ?. if ( $i_first_colon > 0 ) { my $i_question = $mate_index_to_go[$i_first_colon]; if ( defined($i_question) && $i_question > 0 ) { my @insert_list; foreach my $ii ( reverse( 0 .. $i_question - 1 ) ) { my $token = $tokens_to_go[$ii]; my $type = $types_to_go[$ii]; # For now, a good break is either a comma or, # in a long chain, a 'return'. # Patch for RT #126633: added the $nmax>1 check to avoid # breaking after a return for a simple ternary. For longer # chains the break after return allows vertical alignment, so # it is still done. So perltidy -wba='?' will not break # immediately after the return in the following statement: # sub x { # return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' : # 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'; # } if ( ( $type eq ',' || $type eq 'k' && ( $nmax > 1 && $token eq 'return' ) ) && $self->in_same_container_i( $ii, $i_question ) ) { push @insert_list, $ii; last; } } # insert any new break points if (@insert_list) { $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); } } } return; } ## end sub insert_final_ternary_breaks sub insert_breaks_before_list_opening_containers { my ( $self, $ri_left, $ri_right ) = @_; # This routine is called once per batch to implement the parameters # --break-before-hash-brace, etc. # Nothing to do if none of these parameters has been set return unless %break_before_container_types; my $nmax = @{$ri_right} - 1; return if ( $nmax < 0 ); my $rLL = $self->[_rLL_]; my $rbreak_before_container_by_seqno = $self->[_rbreak_before_container_by_seqno_]; my $rK_weld_left = $self->[_rK_weld_left_]; # scan the ends of all lines my @insert_list; for my $n ( 0 .. $nmax ) { my $il = $ri_left->[$n]; my $ir = $ri_right->[$n]; next if ( $ir <= $il ); my $Kl = $K_to_go[$il]; my $Kr = $K_to_go[$ir]; my $Kend = $Kr; my $type_end = $rLL->[$Kr]->[_TYPE_]; # Backup before any side comment if ( $type_end eq '#' ) { $Kend = $self->K_previous_nonblank($Kr); next unless defined($Kend); $type_end = $rLL->[$Kend]->[_TYPE_]; } # Backup to the start of any weld; fix for b1173. if ($total_weld_count) { my $Kend_test = $rK_weld_left->{$Kend}; if ( defined($Kend_test) && $Kend_test > $Kl ) { $Kend = $Kend_test; $Kend_test = $rK_weld_left->{$Kend}; } # Do not break if we did not back up to the start of a weld # (shouldn't happen) next if ( defined($Kend_test) ); } my $token = $rLL->[$Kend]->[_TOKEN_]; next if ( !$is_opening_token{$token} ); next if ( $Kl >= $Kend - 1 ); my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_]; next if ( !defined($seqno) ); # Use the flag which was previously set next unless ( $rbreak_before_container_by_seqno->{$seqno} ); # Install a break before this opening token. my $Kbreak = $self->K_previous_nonblank($Kend); my $ibreak = $Kbreak - $Kl + $il; next if ( $ibreak < $il ); next if ( $nobreak_to_go[$ibreak] ); push @insert_list, $ibreak; } # insert any new break points if (@insert_list) { $self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right ); } return; } ## end sub insert_breaks_before_list_opening_containers sub note_added_semicolon { my ( $self, $line_number ) = @_; $self->[_last_added_semicolon_at_] = $line_number; if ( $self->[_added_semicolon_count_] == 0 ) { $self->[_first_added_semicolon_at_] = $line_number; } $self->[_added_semicolon_count_]++; write_logfile_entry("Added ';' here\n"); return; } ## end sub note_added_semicolon sub note_deleted_semicolon { my ( $self, $line_number ) = @_; $self->[_last_deleted_semicolon_at_] = $line_number; if ( $self->[_deleted_semicolon_count_] == 0 ) { $self->[_first_deleted_semicolon_at_] = $line_number; } $self->[_deleted_semicolon_count_]++; write_logfile_entry("Deleted unnecessary ';' at line $line_number\n"); return; } ## end sub note_deleted_semicolon sub note_embedded_tab { my ( $self, $line_number ) = @_; $self->[_embedded_tab_count_]++; $self->[_last_embedded_tab_at_] = $line_number; if ( !$self->[_first_embedded_tab_at_] ) { $self->[_first_embedded_tab_at_] = $line_number; } if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) { write_logfile_entry("Embedded tabs in quote or pattern\n"); } return; } ## end sub note_embedded_tab use constant DEBUG_CORRECT_LP => 0; sub correct_lp_indentation { # When the -lp option is used, we need to make a last pass through # each line to correct the indentation positions in case they differ # from the predictions. This is necessary because perltidy uses a # predictor/corrector method for aligning with opening parens. The # predictor is usually good, but sometimes stumbles. The corrector # tries to patch things up once the actual opening paren locations # are known. my ( $self, $ri_first, $ri_last ) = @_; # first remove continuation indentation if appropriate my $max_line = @{$ri_first} - 1; #--------------------------------------------------------------------------- # PASS 1: reduce indentation if necessary at any long one-line blocks (c098) #--------------------------------------------------------------------------- # The point is that sub 'starting_one_line_block' made one-line blocks based # on default indentation, not -lp indentation. So some of the one-line # blocks may be too long when given -lp indentation. We will fix that now # if possible, using the list of these closing block indexes. my $ri_starting_one_line_block = $self->[_this_batch_]->[_ri_starting_one_line_block_]; if ( @{$ri_starting_one_line_block} ) { $self->correct_lp_indentation_pass_1( $ri_first, $ri_last, $ri_starting_one_line_block ); } #------------------------------------------------------------------- # PASS 2: look for and fix other problems in each line of this batch #------------------------------------------------------------------- # look at each output line ... foreach my $line ( 0 .. $max_line ) { my $ibeg = $ri_first->[$line]; my $iend = $ri_last->[$line]; # looking at each token in this output line ... foreach my $i ( $ibeg .. $iend ) { # How many space characters to place before this token # for special alignment. Actual padding is done in the # continue block. # looking for next unvisited indentation item ... my $indentation = $leading_spaces_to_go[$i]; # This is just for indentation objects (c098) next unless ( ref($indentation) ); # Visit each indentation object just once next if ( $indentation->get_marked() ); # Mark first visit $indentation->set_marked(1); # Skip indentation objects which do not align with container tokens my $align_seqno = $indentation->get_align_seqno(); next unless ($align_seqno); # Skip a container which is entirely on this line my $Ko = $self->[_K_opening_container_]->{$align_seqno}; my $Kc = $self->[_K_closing_container_]->{$align_seqno}; if ( defined($Ko) && defined($Kc) ) { next if ( $Ko >= $K_to_go[$ibeg] && $Kc <= $K_to_go[$iend] ); } # Note on flag '$do_not_pad': # We want to avoid a situation like this, where the aligner # inserts whitespace before the '=' to align it with a previous # '=', because otherwise the parens might become mis-aligned in a # situation like this, where the '=' has become aligned with the # previous line, pushing the opening '(' forward beyond where we # want it. # # $mkFloor::currentRoom = ''; # $mkFloor::c_entry = $c->Entry( # -width => '10', # -relief => 'sunken', # ... # ); # # We leave it to the aligner to decide how to do this. if ( $line == 1 && $i == $ibeg ) { $self->[_this_batch_]->[_do_not_pad_] = 1; } #-------------------------------------------- # Now see what the error is and try to fix it #-------------------------------------------- my $closing_index = $indentation->get_closed(); my $predicted_pos = $indentation->get_spaces(); # Find actual position: my $actual_pos; if ( $i == $ibeg ) { # Case 1: token is first character of of batch - table lookup if ( $line == 0 ) { $actual_pos = $predicted_pos; my ( $indent, $offset, $is_leading, $exists ) = get_saved_opening_indentation($align_seqno); if ( defined($indent) ) { # NOTE: we could use '1' here if no space after # opening and '2' if want space; it is hardwired at 1 # like -gnu-style. But it is probably best to leave # this alone because changing it would change # formatting of much existing code without any # significant benefit. $actual_pos = get_spaces($indent) + $offset + 1; } } # Case 2: token starts a new line - use length of previous line else { my $ibegm = $ri_first->[ $line - 1 ]; my $iendm = $ri_last->[ $line - 1 ]; $actual_pos = total_line_length( $ibegm, $iendm ); # follow -pt style ++$actual_pos if ( $types_to_go[ $iendm + 1 ] eq 'b' ); } } # Case 3: $i>$ibeg: token is mid-line - use length to previous token else { $actual_pos = total_line_length( $ibeg, $i - 1 ); # for mid-line token, we must check to see if all # additional lines have continuation indentation, # and remove it if so. Otherwise, we do not get # good alignment. if ( $closing_index > $iend ) { my $ibeg_next = $ri_first->[ $line + 1 ]; if ( $ci_levels_to_go[$ibeg_next] > 0 ) { $self->undo_lp_ci( $line, $i, $closing_index, $ri_first, $ri_last ); } } } # By how many spaces (plus or minus) would we need to increase the # indentation to get alignment with the opening token? my $move_right = $actual_pos - $predicted_pos; if (DEBUG_CORRECT_LP) { my $tok = substr( $tokens_to_go[$i], 0, 8 ); my $avail = $self->get_available_spaces_to_go($ibeg); print "CORRECT_LP for seq=$align_seqno, predicted pos=$predicted_pos actual=$actual_pos => move right=$move_right available=$avail i=$i max=$max_index_to_go tok=$tok\n"; } # nothing more to do if no error to correct (gnu2.t) if ( $move_right == 0 ) { $indentation->set_recoverable_spaces($move_right); next; } # Get any collapsed length defined for -xlp my $collapsed_length = $self->[_rcollapsed_length_by_seqno_]->{$align_seqno}; $collapsed_length = 0 unless ( defined($collapsed_length) ); if (DEBUG_CORRECT_LP) { print "CORRECT_LP for seq=$align_seqno, collapsed length is $collapsed_length\n"; } # if we have not seen closure for this indentation in this batch, # and do not have a collapsed length estimate, we can only pass on # a request to the vertical aligner if ( $closing_index < 0 && !$collapsed_length ) { $indentation->set_recoverable_spaces($move_right); next; } # If necessary, look ahead to see if there is really any leading # whitespace dependent on this whitespace, and also find the # longest line using this whitespace. Since it is always safe to # move left if there are no dependents, we only need to do this if # we may have dependent nodes or need to move right. my $have_child = $indentation->get_have_child(); my %saw_indentation; my $line_count = 1; $saw_indentation{$indentation} = $indentation; # How far can we move right before we hit the limit? # let $right_margen = the number of spaces that we can increase # the current indentation before hitting the maximum line length. my $right_margin = 0; if ( $have_child || $move_right > 0 ) { $have_child = 0; # include estimated collapsed length for incomplete containers my $max_length = 0; if ( $Kc > $K_to_go[$max_index_to_go] ) { $max_length = $collapsed_length + $predicted_pos; } if ( $i == $ibeg ) { my $length = total_line_length( $ibeg, $iend ); if ( $length > $max_length ) { $max_length = $length } } # look ahead at the rest of the lines of this batch.. foreach my $line_t ( $line + 1 .. $max_line ) { my $ibeg_t = $ri_first->[$line_t]; my $iend_t = $ri_last->[$line_t]; last if ( $closing_index <= $ibeg_t ); # remember all different indentation objects my $indentation_t = $leading_spaces_to_go[$ibeg_t]; $saw_indentation{$indentation_t} = $indentation_t; $line_count++; # remember longest line in the group my $length_t = total_line_length( $ibeg_t, $iend_t ); if ( $length_t > $max_length ) { $max_length = $length_t; } } $right_margin = $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] - $max_length; if ( $right_margin < 0 ) { $right_margin = 0 } } my $first_line_comma_count = grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ]; my $comma_count = $indentation->get_comma_count(); my $arrow_count = $indentation->get_arrow_count(); # This is a simple approximate test for vertical alignment: # if we broke just after an opening paren, brace, bracket, # and there are 2 or more commas in the first line, # and there are no '=>'s, # then we are probably vertically aligned. We could set # an exact flag in sub break_lists, but this is good # enough. my $indentation_count = keys %saw_indentation; my $is_vertically_aligned = ( $i == $ibeg && $first_line_comma_count > 1 && $indentation_count == 1 && ( $arrow_count == 0 || $arrow_count == $line_count ) ); # Make the move if possible .. if ( # we can always move left $move_right < 0 # -xlp # incomplete container || ( $rOpts_extended_line_up_parentheses && $Kc > $K_to_go[$max_index_to_go] ) || $closing_index < 0 # but we should only move right if we are sure it will # not spoil vertical alignment || ( $comma_count == 0 ) || ( $comma_count > 0 && !$is_vertically_aligned ) ) { my $move = ( $move_right <= $right_margin ) ? $move_right : $right_margin; if (DEBUG_CORRECT_LP) { print "CORRECT_LP for seq=$align_seqno, moving $move spaces\n"; } foreach ( keys %saw_indentation ) { $saw_indentation{$_} ->permanently_decrease_available_spaces( -$move ); } } # Otherwise, record what we want and the vertical aligner # will try to recover it. else { $indentation->set_recoverable_spaces($move_right); } } ## end loop over tokens in a line } ## end loop over lines return; } ## end sub correct_lp_indentation sub correct_lp_indentation_pass_1 { my ( $self, $ri_first, $ri_last, $ri_starting_one_line_block ) = @_; # So some of the one-line blocks may be too long when given -lp # indentation. We will fix that now if possible, using the list of these # closing block indexes. my @ilist = @{$ri_starting_one_line_block}; return unless (@ilist); my $max_line = @{$ri_first} - 1; my $inext = shift(@ilist); # loop over lines, checking length of each with a one-line block my ( $ibeg, $iend ); foreach my $line ( 0 .. $max_line ) { $iend = $ri_last->[$line]; next if ( $inext > $iend ); $ibeg = $ri_first->[$line]; # This is just for lines with indentation objects (c098) my $excess = ref( $leading_spaces_to_go[$ibeg] ) ? $self->excess_line_length( $ibeg, $iend ) : 0; if ( $excess > 0 ) { my $available_spaces = $self->get_available_spaces_to_go($ibeg); if ( $available_spaces > 0 ) { my $delete_want = min( $available_spaces, $excess ); my $deleted_spaces = $self->reduce_lp_indentation( $ibeg, $delete_want ); $available_spaces = $self->get_available_spaces_to_go($ibeg); } } # skip forward to next one-line block to check while (@ilist) { $inext = shift @ilist; next if ( $inext <= $iend ); last if ( $inext > $iend ); } last if ( $inext <= $iend ); } return; } ## end sub correct_lp_indentation_pass_1 sub undo_lp_ci { # If there is a single, long parameter within parens, like this: # # $self->command( "/msg " # . $infoline->chan # . " You said $1, but did you know that it's square was " # . $1 * $1 . " ?" ); # # we can remove the continuation indentation of the 2nd and higher lines # to achieve this effect, which is more pleasing: # # $self->command("/msg " # . $infoline->chan # . " You said $1, but did you know that it's square was " # . $1 * $1 . " ?"); my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_; my $max_line = @{$ri_first} - 1; # must be multiple lines return if ( $max_line <= $line_open ); my $lev_start = $levels_to_go[$i_start]; my $ci_start_plus = 1 + $ci_levels_to_go[$i_start]; # see if all additional lines in this container have continuation # indentation my $line_1 = 1 + $line_open; my $n = $line_open; while ( ++$n <= $max_line ) { my $ibeg = $ri_first->[$n]; my $iend = $ri_last->[$n]; if ( $ibeg eq $closing_index ) { $n--; last } return if ( $lev_start != $levels_to_go[$ibeg] ); return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] ); last if ( $closing_index <= $iend ); } # we can reduce the indentation of all continuation lines my $continuation_line_count = $n - $line_open; @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] = (0) x ($continuation_line_count); @leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] = @reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ]; return; } ## end sub undo_lp_ci ################################################ # CODE SECTION 10: Code to break long statements ################################################ use constant DEBUG_BREAK_LINES => 0; sub break_long_lines { #----------------------------------------------------------- # Break a batch of tokens into lines which do not exceed the # maximum line length. #----------------------------------------------------------- my ( $self, $saw_good_break, $rcolon_list, $rbond_strength_bias ) = @_; # Input parameters: # $saw_good_break - a flag set by break_lists # $rcolon_list - ref to a list of all the ? and : tokens in the batch, # in order. # $rbond_strength_bias - small bond strength bias values set by break_lists # Output: returns references to the arrays: # @i_first # @i_last # which contain the indexes $i of the first and last tokens on each # line. # In addition, the array: # $forced_breakpoint_to_go[$i] # may be updated to be =1 for any index $i after which there must be # a break. This signals later routines not to undo the breakpoint. # Method: # This routine is called if a statement is longer than the maximum line # length, or if a preliminary scanning located desirable break points. # Sub break_lists has already looked at these tokens and set breakpoints # (in array $forced_breakpoint_to_go[$i]) where it wants breaks (for # example after commas, after opening parens, and before closing parens). # This routine will honor these breakpoints and also add additional # breakpoints as necessary to keep the line length below the maximum # requested. It bases its decision on where the 'bond strength' is # lowest. my @i_first = (); # the first index to output my @i_last = (); # the last index to output my @i_colon_breaks = (); # needed to decide if we have to break at ?'s if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 } # Get the 'bond strengths' between tokens my $rbond_strength_to_go = $self->set_bond_strengths(); # Add any comma bias set by break_lists if ( @{$rbond_strength_bias} ) { foreach my $item ( @{$rbond_strength_bias} ) { my ( $ii, $bias ) = @{$item}; if ( $ii >= 0 && $ii <= $max_index_to_go ) { $rbond_strength_to_go->[$ii] += $bias; } else { if (DEVEL_MODE) { my $KK = $K_to_go[0]; my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_]; Fault( "Bad bond strength bias near line $lno: i=$ii must be between 0 and $max_index_to_go\n" ); } } } } my $imin = 0; my $imax = $max_index_to_go; if ( $types_to_go[$imin] eq 'b' ) { $imin++ } if ( $types_to_go[$imax] eq 'b' ) { $imax-- } my $i_begin = $imin; my $last_break_strength = NO_BREAK; my $i_last_break = -1; my $line_count = 0; # see if any ?/:'s are in order my $colons_in_order = 1; my $last_tok = EMPTY_STRING; foreach ( @{$rcolon_list} ) { if ( $_ eq $last_tok ) { $colons_in_order = 0; last } $last_tok = $_; } # This is a sufficient but not necessary condition for colon chain my $is_colon_chain = ( $colons_in_order && @{$rcolon_list} > 2 ); #------------------------------------------ # BEGINNING of main loop to set breakpoints # Keep iterating until we reach the end #------------------------------------------ while ( $i_begin <= $imax ) { #------------------------------------------------------------------ # Find the best next breakpoint based on token-token bond strengths #------------------------------------------------------------------ my ( $i_lowest, $lowest_strength, $leading_alignment_type, $Msg ) = $self->break_lines_inner_loop( $i_begin, $i_last_break, $imax, $last_break_strength, $line_count, $rbond_strength_to_go, $saw_good_break, ); # Now make any adjustments required by ternary breakpoint rules if ( @{$rcolon_list} ) { my $i_next_nonblank = $inext_to_go[$i_lowest]; #------------------------------------------------------- # ?/: rule 1 : if a break here will separate a '?' on this # line from its closing ':', then break at the '?' instead. # But do not break a sequential chain of ?/: statements #------------------------------------------------------- if ( !$is_colon_chain ) { foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) { next unless ( $tokens_to_go[$i] eq '?' ); # do not break if statement is broken by side comment next if ( $tokens_to_go[$max_index_to_go] eq '#' && terminal_type_i( 0, $max_index_to_go ) !~ /^[\;\}]$/ ); # no break needed if matching : is also on the line next if ( defined( $mate_index_to_go[$i] ) && $mate_index_to_go[$i] <= $i_next_nonblank ); $i_lowest = $i; if ( $want_break_before{'?'} ) { $i_lowest-- } $i_next_nonblank = $inext_to_go[$i_lowest]; last; } } my $next_nonblank_type = $types_to_go[$i_next_nonblank]; #------------------------------------------------------------- # ?/: rule 2 : if we break at a '?', then break at its ':' # # Note: this rule is also in sub break_lists to handle a break # at the start and end of a line (in case breaks are dictated # by side comments). #------------------------------------------------------------- if ( $next_nonblank_type eq '?' ) { $self->set_closing_breakpoint($i_next_nonblank); } elsif ( $types_to_go[$i_lowest] eq '?' ) { $self->set_closing_breakpoint($i_lowest); } else { ## ok } #-------------------------------------------------------- # ?/: rule 3 : if we break at a ':' then we save # its location for further work below. We may need to go # back and break at its '?'. #-------------------------------------------------------- if ( $next_nonblank_type eq ':' ) { push @i_colon_breaks, $i_next_nonblank; } elsif ( $types_to_go[$i_lowest] eq ':' ) { push @i_colon_breaks, $i_lowest; } else { ## ok } # here we should set breaks for all '?'/':' pairs which are # separated by this line } # guard against infinite loop (should never happen) if ( $i_lowest <= $i_last_break ) { DEVEL_MODE && Fault("i_lowest=$i_lowest <= i_last_break=$i_last_break\n"); $i_lowest = $imax; } DEBUG_BREAK_LINES && print {*STDOUT} "BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n"; $line_count++; # save this line segment, after trimming blanks at the ends push( @i_first, ( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin ); push( @i_last, ( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest ); # set a forced breakpoint at a container opening, if necessary, to # signal a break at a closing container. Excepting '(' for now. if ( ( $tokens_to_go[$i_lowest] eq '{' || $tokens_to_go[$i_lowest] eq '[' ) && !$forced_breakpoint_to_go[$i_lowest] ) { $self->set_closing_breakpoint($i_lowest); } # get ready to find the next breakpoint $last_break_strength = $lowest_strength; $i_last_break = $i_lowest; $i_begin = $i_lowest + 1; # skip past a blank if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) { $i_begin++; } } #------------------------------------------------- # END of main loop to set continuation breakpoints #------------------------------------------------- #----------------------------------------------------------- # ?/: rule 4 -- if we broke at a ':', then break at # corresponding '?' unless this is a chain of ?: expressions #----------------------------------------------------------- if (@i_colon_breaks) { my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 ); if ( !$is_chain ) { $self->do_colon_breaks( \@i_colon_breaks, \@i_first, \@i_last ); } } return ( \@i_first, \@i_last, $rbond_strength_to_go ); } ## end sub break_long_lines # small bond strength numbers to help break ties use constant TINY_BIAS => 0.0001; use constant MAX_BIAS => 0.001; sub break_lines_inner_loop { #----------------------------------------------------------------- # Find the best next breakpoint in index range ($i_begin .. $imax) # which, if possible, does not exceed the maximum line length. #----------------------------------------------------------------- my ( $self, # $i_begin, $i_last_break, $imax, $last_break_strength, $line_count, $rbond_strength_to_go, $saw_good_break, ) = @_; # Given: # $i_begin = first index of range # $i_last_break = index of previous break # $imax = last index of range # $last_break_strength = bond strength of last break # $line_count = number of output lines so far # $rbond_strength_to_go = ref to array of bond strengths # $saw_good_break = true if old line had a good breakpoint # Returns: # $i_lowest = index of best breakpoint # $lowest_strength = 'bond strength' at best breakpoint # $leading_alignment_type = special token type after break # $Msg = string of debug info my $Msg = EMPTY_STRING; my $strength = NO_BREAK; my $i_test = $i_begin - 1; my $i_lowest = -1; my $starting_sum = $summed_lengths_to_go[$i_begin]; my $lowest_strength = NO_BREAK; my $leading_alignment_type = EMPTY_STRING; my $leading_spaces = leading_spaces_to_go($i_begin); my $maximum_line_length = $maximum_line_length_at_level[ $levels_to_go[$i_begin] ]; DEBUG_BREAK_LINES && do { $Msg .= "updating leading spaces to be $leading_spaces at i=$i_begin\n"; }; # Do not separate an isolated bare word from an opening paren. # Alternate Fix #2 for issue b1299. This waits as long as possible # to make the decision. # Note for fix #c250: to keep line breaks unchanged under -extrude when # switching from 'i' to 'S' for subs, we would have to also check 'S', i.e. # =~/^[Si]$/. But this was never necessary at a sub signature, so we leave # it alone and allow the new version to be different for --extrude. For a # test file run perl527/signatures.t with --extrude. if ( $types_to_go[$i_begin] eq 'i' && substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ ) { my $i_next_nonblank = $inext_to_go[$i_begin]; if ( $tokens_to_go[$i_next_nonblank] eq '(' ) { $rbond_strength_to_go->[$i_begin] = NO_BREAK; } } # Avoid a break which would strand a single punctuation # token. For example, we do not want to strand a leading # '.' which is followed by a long quoted string. # But note that we do want to do this with -extrude (l=1) # so please test any changes to this code on -extrude. if ( ( $i_begin < $imax ) && ( $tokens_to_go[$i_begin] eq $types_to_go[$i_begin] ) && !$forced_breakpoint_to_go[$i_begin] && !( # Allow break after a closing eval brace. This is an # approximate way to simulate a forced breakpoint made in # Section B below. No differences have been found, but if # necessary the full logic of Section B could be used here # (see c165). $tokens_to_go[$i_begin] eq '}' && $block_type_to_go[$i_begin] && $block_type_to_go[$i_begin] eq 'eval' ) && ( ( $leading_spaces + $summed_lengths_to_go[ $i_begin + 1 ] - $starting_sum ) < $maximum_line_length ) ) { $i_test = min( $imax, $inext_to_go[$i_begin] ) - 1; DEBUG_BREAK_LINES && do { $Msg .= " :skip ahead at i=$i_test"; }; } #------------------------------------------------------- # Begin INNER_LOOP over the indexes in the _to_go arrays #------------------------------------------------------- while ( ++$i_test <= $imax ) { my $type = $types_to_go[$i_test]; my $token = $tokens_to_go[$i_test]; my $i_next_nonblank = $inext_to_go[$i_test]; my $next_nonblank_type = $types_to_go[$i_next_nonblank]; my $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; #--------------------------------------------------------------- # Section A: Get token-token strength and handle any adjustments #--------------------------------------------------------------- # adjustments to the previous bond strength may have been made, and # we must keep the bond strength of a token and its following blank # the same; my $last_strength = $strength; $strength = $rbond_strength_to_go->[$i_test]; if ( $type eq 'b' ) { $strength = $last_strength } # reduce strength a bit to break ties at an old comma breakpoint ... if ( $old_breakpoint_to_go[$i_test] # Patch: limited to just commas to avoid blinking states && $type eq ',' # which is a 'good' breakpoint, meaning ... # we don't want to break before it && !$want_break_before{$type} # and either we want to break before the next token # or the next token is not short (i.e. not a '*', '/' etc.) && $i_next_nonblank <= $imax && ( $want_break_before{$next_nonblank_type} || $token_lengths_to_go[$i_next_nonblank] > 2 || $next_nonblank_type eq ',' || $is_opening_type{$next_nonblank_type} ) ) { $strength -= TINY_BIAS; DEBUG_BREAK_LINES && do { $Msg .= " :-bias at i=$i_test" }; } # otherwise increase strength a bit if this token would be at the # maximum line length. This is necessary to avoid blinking # in the above example when the -iob flag is added. else { my $len = $leading_spaces + $summed_lengths_to_go[ $i_test + 1 ] - $starting_sum; if ( $len >= $maximum_line_length ) { $strength += TINY_BIAS; DEBUG_BREAK_LINES && do { $Msg .= " :+bias at i=$i_test" }; } } #------------------------------------- # Section B: Handle forced breakpoints #------------------------------------- my $must_break; # Force an immediate break at certain operators # with lower level than the start of the line, # unless we've already seen a better break. # # Note on an issue with a preceding '?' : # There may be a break at a previous ? if the line is long. Because # of this we do not want to force a break if there is a previous ? on # this line. For now the best way to do this is to not break if we # have seen a lower strength point, which is probably a ?. # # Example of unwanted breaks we are avoiding at a '.' following a ? # from pod2html using perltidy -gnu: # ) # ? "\n<A NAME=\"" # . $value # . "\">\n$text</A>\n" # : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n"; if ( ( $strength <= $lowest_strength ) && ( $nesting_depth_to_go[$i_begin] > $nesting_depth_to_go[$i_next_nonblank] ) && ( $next_nonblank_type =~ /^(\.|\&\&|\|\|)$/ || ( $next_nonblank_type eq 'k' ## /^(and|or)$/ # note: includes 'xor' now && $is_and_or{$next_nonblank_token} ) ) ) { $self->set_forced_breakpoint($i_next_nonblank); DEBUG_BREAK_LINES && do { $Msg .= " :Forced break at i=$i_next_nonblank" }; } if ( # Try to put a break where requested by break_lists $forced_breakpoint_to_go[$i_test] # break between ) { in a continued line so that the '{' can # be outdented # See similar logic in break_lists which catches instances # where a line is just something like ') {'. We have to # be careful because the corresponding block keyword might # not be on the first line, such as 'for' here: # # eval { # for ("a") { # for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ } # } # }; # || ( $line_count && ( $token eq ')' ) && ( $next_nonblank_type eq '{' ) && ($next_nonblank_block_type) && ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] ) # RT #104427: Dont break before opening sub brace because # sub block breaks handled at higher level, unless # it looks like the preceding list is long and broken && !( ( $next_nonblank_block_type =~ /$SUB_PATTERN/ || $matches_ASUB{$next_nonblank_block_type} ) && ( $nesting_depth_to_go[$i_begin] == $nesting_depth_to_go[$i_next_nonblank] ) ) && !$rOpts_opening_brace_always_on_right ) # There is an implied forced break at a terminal opening brace || ( ( $type eq '{' ) && ( $i_test == $imax ) ) ) { # Forced breakpoints must sometimes be overridden, for example # because of a side comment causing a NO_BREAK. It is easier # to catch this here than when they are set. if ( $strength < NO_BREAK - 1 ) { $strength = $lowest_strength - TINY_BIAS; $must_break = 1; DEBUG_BREAK_LINES && do { $Msg .= " :set must_break at i=$i_next_nonblank" }; } } # quit if a break here would put a good terminal token on # the next line and we already have a possible break if ( ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' ) && !$must_break && ( ( $leading_spaces + $summed_lengths_to_go[ $i_next_nonblank + 1 ] - $starting_sum ) > $maximum_line_length ) ) { if ( $i_lowest >= 0 ) { DEBUG_BREAK_LINES && do { $Msg .= " :quit at good terminal='$next_nonblank_type'"; }; last; } } #------------------------------------------------------------ # Section C: Look for the lowest bond strength between tokens #------------------------------------------------------------ if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) { # break at previous best break if it would have produced # a leading alignment of certain common tokens, and it # is different from the latest candidate break if ($leading_alignment_type) { DEBUG_BREAK_LINES && do { $Msg .= " :last at leading_alignment='$leading_alignment_type'"; }; last; } # Force at least one breakpoint if old code had good # break It is only called if a breakpoint is required or # desired. This will probably need some adjustments # over time. A goal is to try to be sure that, if a new # side comment is introduced into formatted text, then # the same breakpoints will occur. scbreak.t if ( $i_test == $imax # we are at the end && !$forced_breakpoint_count && $saw_good_break # old line had good break && $type =~ /^[#;\{]$/ # and this line ends in # ';' or side comment && $i_last_break < 0 # and we haven't made a break && $i_lowest >= 0 # and we saw a possible break && $i_lowest < $imax - 1 # (but not just before this ;) && $strength - $lowest_strength < 0.5 * WEAK # and it's good ) { DEBUG_BREAK_LINES && do { $Msg .= " :last at good old break\n"; }; last; } # Do not skip past an important break point in a short final # segment. For example, without this check we would miss the # break at the final / in the following code: # # $depth_stop = # ( $tau * $mass_pellet * $q_0 * # ( 1. - exp( -$t_stop / $tau ) ) - # 4. * $pi * $factor * $k_ice * # ( $t_melt - $t_ice ) * # $r_pellet * # $t_stop ) / # ( $rho_ice * $Qs * $pi * $r_pellet**2 ); # if ( $line_count > 2 && $i_lowest >= 0 # and we saw a possible break && $i_lowest < $i_test && $i_test > $imax - 2 && $nesting_depth_to_go[$i_begin] > $nesting_depth_to_go[$i_lowest] && $lowest_strength < $last_break_strength - .5 * WEAK ) { # Make this break for math operators for now my $ir = $inext_to_go[$i_lowest]; my $il = iprev_to_go($ir); if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/ || $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ ) { DEBUG_BREAK_LINES && do { $Msg .= " :last-noskip_short"; }; last; } } # Update the minimum bond strength location $lowest_strength = $strength; $i_lowest = $i_test; if ($must_break) { DEBUG_BREAK_LINES && do { $Msg .= " :last-must_break"; }; last; } # set flags to remember if a break here will produce a # leading alignment of certain common tokens if ( $line_count > 0 && $i_test < $imax && ( $lowest_strength - $last_break_strength <= MAX_BIAS ) ) { my $i_last_end = iprev_to_go($i_begin); my $tok_beg = $tokens_to_go[$i_begin]; my $type_beg = $types_to_go[$i_begin]; if ( # check for leading alignment of certain tokens ( $tok_beg eq $next_nonblank_token && $is_chain_operator{$tok_beg} && ( $type_beg eq 'k' || $type_beg eq $tok_beg ) && $nesting_depth_to_go[$i_begin] >= $nesting_depth_to_go[$i_next_nonblank] ) || ( $tokens_to_go[$i_last_end] eq $token && $is_chain_operator{$token} && ( $type eq 'k' || $type eq $token ) && $nesting_depth_to_go[$i_last_end] >= $nesting_depth_to_go[$i_test] ) ) { $leading_alignment_type = $next_nonblank_type; } } } #----------------------------------------------------------- # Section D: See if the maximum line length will be exceeded #----------------------------------------------------------- # Quit if there are no more tokens to test last if ( $i_test >= $imax ); # Keep going if we have not reached the limit my $excess = $leading_spaces + $summed_lengths_to_go[ $i_test + 2 ] - $starting_sum - $maximum_line_length; if ( $excess < 0 ) { next; } elsif ( $excess == 0 ) { # To prevent blinkers we will avoid leaving a token exactly at # the line length limit unless it is the last token or one of # several "good" types. # # The following code was a blinker with -pbp before this # modification: # $last_nonblank_token eq '(' # && $is_indirect_object_taker{ $paren_type # [$paren_depth] } # The issue causing the problem is that if the # term [$paren_depth] gets broken across a line then # the whitespace routine doesn't see both opening and closing # brackets and will format like '[ $paren_depth ]'. This # leads to an oscillation in length depending if we break # before the closing bracket or not. if ( $i_test + 1 < $imax && $next_nonblank_type ne ',' && !$is_closing_type{$next_nonblank_type} ) { # too long DEBUG_BREAK_LINES && do { $Msg .= " :too_long"; } } else { next; } } else { # too long } # a break here makes the line too long ... DEBUG_BREAK_LINES && do { my $ltok = $token; my $rtok = $next_nonblank_token ? $next_nonblank_token : EMPTY_STRING; my $i_testp2 = $i_test + 2; if ( $i_testp2 > $max_index_to_go + 1 ) { $i_testp2 = $max_index_to_go + 1; } if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) } if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) } print {*STDOUT} "BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] str=$strength $ltok $rtok\n"; }; # Exception: allow one extra terminal token after exceeding line length # if it would strand this token. if ( $i_lowest == $i_test && $token_lengths_to_go[$i_test] > 1 && ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' ) && $rOpts_fuzzy_line_length ) { DEBUG_BREAK_LINES && do { $Msg .= " :do_not_strand next='$next_nonblank_type'"; }; next; } # Stop if here if we have a solution and the line will be too long if ( $i_lowest >= 0 ) { DEBUG_BREAK_LINES && do { $Msg .= " :Done-too_long && i_lowest=$i_lowest at itest=$i_test, imax=$imax"; }; last; } } #----------------------------------------------------- # End INNER_LOOP over the indexes in the _to_go arrays #----------------------------------------------------- # Be sure we return an index in the range ($ibegin .. $imax). # We will break at imax if no other break was found. if ( $i_lowest < 0 ) { $i_lowest = $imax } return ( $i_lowest, $lowest_strength, $leading_alignment_type, $Msg ); } ## end sub break_lines_inner_loop sub do_colon_breaks { my ( $self, $ri_colon_breaks, $ri_first, $ri_last ) = @_; # using a simple method for deciding if we are in a ?/: chain -- # this is a chain if it has multiple ?/: pairs all in order; # otherwise not. # Note that if line starts in a ':' we count that above as a break my @insert_list = (); foreach ( @{$ri_colon_breaks} ) { my $i_question = $mate_index_to_go[$_]; if ( defined($i_question) ) { if ( $want_break_before{'?'} ) { $i_question = iprev_to_go($i_question); } if ( $i_question >= 0 ) { push @insert_list, $i_question; } } $self->insert_additional_breaks( \@insert_list, $ri_first, $ri_last ); } return; } ## end sub do_colon_breaks ########################################### # CODE SECTION 11: Code to break long lists ########################################### { ## begin closure break_lists # These routines and variables are involved in finding good # places to break long lists. use constant DEBUG_BREAK_LISTS => 0; my ( $block_type, $current_depth, $depth, $i, $i_last_colon, $i_line_end, $i_line_start, $i_last_nonblank_token, $last_nonblank_block_type, $last_nonblank_token, $last_nonblank_type, $last_old_breakpoint_count, $minimum_depth, $next_nonblank_block_type, $next_nonblank_token, $next_nonblank_type, $old_breakpoint_count, $starting_breakpoint_count, $starting_depth, $token, $type, $type_sequence, ); my ( @breakpoint_stack, @breakpoint_undo_stack, @comma_index, @container_type, @identifier_count_stack, @index_before_arrow, @interrupted_list, @item_count_stack, @last_comma_index, @last_dot_index, @last_nonblank_type, @old_breakpoint_count_stack, @opening_structure_index_stack, @rfor_semicolon_list, @has_old_logical_breakpoints, @rand_or_list, @i_equals, @override_cab3, @type_sequence_stack, ); # these arrays must retain values between calls my ( @has_broken_sublist, @dont_align, @want_comma_break ); my $length_tol; my $lp_tol_boost; sub initialize_break_lists { @dont_align = (); @has_broken_sublist = (); @want_comma_break = (); #--------------------------------------------------- # Set tolerances to prevent formatting instabilities #--------------------------------------------------- # Define tolerances to use when checking if closed # containers will fit on one line. This is necessary to avoid # formatting instability. The basic tolerance is based on the # following: # - Always allow for at least one extra space after a closing token so # that we do not strand a comma or semicolon. (oneline.t). # - Use an increased line length tolerance when -ci > -i to avoid # blinking states (case b923 and others). $length_tol = 1 + max( 0, $rOpts_continuation_indentation - $rOpts_indent_columns ); # In addition, it may be necessary to use a few extra tolerance spaces # when -lp is used and/or when -xci is used. The history of this # so far is as follows: # FIX1: At least 3 characters were been found to be required for -lp # to fixes cases b1059 b1063 b1117. # FIX2: Further testing showed that we need a total of 3 extra spaces # when -lp is set for non-lists, and at least 2 spaces when -lp and # -xci are set. # Fixes cases b1063 b1103 b1134 b1135 b1136 b1138 b1140 b1143 b1144 # b1145 b1146 b1147 b1148 b1151 b1152 b1153 b1154 b1156 b1157 b1164 # b1165 # FIX3: To fix cases b1169 b1170 b1171, an update was made in sub # 'find_token_starting_list' to go back before an initial blank space. # This fixed these three cases, and allowed the tolerances to be # reduced to continue to fix all other known cases of instability. # This gives the current tolerance formulation. $lp_tol_boost = 0; if ($rOpts_line_up_parentheses) { # boost tol for combination -lp -xci if ($rOpts_extended_continuation_indentation) { $lp_tol_boost = 2; } # boost tol for combination -lp and any -vtc > 0, but only for # non-list containers else { foreach ( keys %closing_vertical_tightness ) { next unless ( $closing_vertical_tightness{$_} ); $lp_tol_boost = 1; # Fixes B1193; last; } } } # Define a level where list formatting becomes highly stressed and # needs to be simplified. Introduced for case b1262. # $list_stress_level = min($stress_level_alpha, $stress_level_beta + 2); # This is now '$high_stress_level'. return; } ## end sub initialize_break_lists # routine to define essential variables when we go 'up' to # a new depth sub check_for_new_minimum_depth { my ( $self, $depth_t, $seqno ) = @_; if ( $depth_t < $minimum_depth ) { $minimum_depth = $depth_t; # these arrays need not retain values between calls my $old_seqno = $type_sequence_stack[$depth_t]; my $changed_seqno = !defined($old_seqno) || $old_seqno != $seqno; $type_sequence_stack[$depth_t] = $seqno; $override_cab3[$depth_t] = undef; if ( $rOpts_comma_arrow_breakpoints == 3 && $seqno ) { $override_cab3[$depth_t] = $self->[_roverride_cab3_]->{$seqno}; } $breakpoint_stack[$depth_t] = $starting_breakpoint_count; $container_type[$depth_t] = EMPTY_STRING; $identifier_count_stack[$depth_t] = 0; $index_before_arrow[$depth_t] = -1; $interrupted_list[$depth_t] = 1; $item_count_stack[$depth_t] = 0; $last_nonblank_type[$depth_t] = EMPTY_STRING; $opening_structure_index_stack[$depth_t] = -1; $breakpoint_undo_stack[$depth_t] = undef; $comma_index[$depth_t] = undef; $last_comma_index[$depth_t] = undef; $last_dot_index[$depth_t] = undef; $old_breakpoint_count_stack[$depth_t] = undef; $has_old_logical_breakpoints[$depth_t] = 0; $rand_or_list[$depth_t] = []; $rfor_semicolon_list[$depth_t] = []; $i_equals[$depth_t] = -1; # these arrays must retain values between calls if ( $changed_seqno || !defined( $has_broken_sublist[$depth_t] ) ) { $dont_align[$depth_t] = 0; $has_broken_sublist[$depth_t] = 0; $want_comma_break[$depth_t] = 0; } } return; } ## end sub check_for_new_minimum_depth # routine to decide which commas to break at within a container; # returns: # $bp_count = number of comma breakpoints set # $do_not_break_apart = a flag indicating if container need not # be broken open sub set_comma_breakpoints { my ( $self, $dd, $rbond_strength_bias ) = @_; my $bp_count = 0; my $do_not_break_apart = 0; # anything to do? if ( $item_count_stack[$dd] ) { # Do not break a list unless there are some non-line-ending commas. # This avoids getting different results with only non-essential # commas, and fixes b1192. my $seqno = $type_sequence_stack[$dd]; my $real_comma_count = $seqno ? $self->[_rtype_count_by_seqno_]->{$seqno}->{','} : 1; # handle commas not in containers... if ( $dont_align[$dd] ) { $self->do_uncontained_comma_breaks( $dd, $rbond_strength_bias ); } # handle commas within containers... elsif ($real_comma_count) { my $fbc = $forced_breakpoint_count; # always open comma lists not preceded by keywords, # barewords, identifiers (that is, anything that doesn't # look like a function call) # c250: added new sub identifier type 'S' my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiUS]$/; $self->table_maker( { depth => $dd, i_opening_paren => $opening_structure_index_stack[$dd], i_closing_paren => $i, item_count => $item_count_stack[$dd], identifier_count => $identifier_count_stack[$dd], rcomma_index => $comma_index[$dd], next_nonblank_type => $next_nonblank_type, list_type => $container_type[$dd], interrupted => $interrupted_list[$dd], rdo_not_break_apart => \$do_not_break_apart, must_break_open => $must_break_open, has_broken_sublist => $has_broken_sublist[$dd], } ); $bp_count = $forced_breakpoint_count - $fbc; $do_not_break_apart = 0 if $must_break_open; } else { ## no real commas, nothing to do } } return ( $bp_count, $do_not_break_apart ); } ## end sub set_comma_breakpoints # These types are excluded at breakpoints to prevent blinking # Switched from excluded to included as part of fix for b1214 my %is_uncontained_comma_break_included_type; BEGIN { my @q = qw< k R } ) ] Y Z U w i q Q . = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=>; @is_uncontained_comma_break_included_type{@q} = (1) x scalar(@q); } ## end BEGIN sub do_uncontained_comma_breaks { # Handle commas not in containers... # This is a catch-all routine for commas that we # don't know what to do with because the don't fall # within containers. We will bias the bond strength # to break at commas which ended lines in the input # file. This usually works better than just trying # to put as many items on a line as possible. A # downside is that if the input file is garbage it # won't work very well. However, the user can always # prevent following the old breakpoints with the # -iob flag. my ( $self, $dd, $rbond_strength_bias ) = @_; # Check added for issue c131; an error here would be due to an # error initializing @comma_index when entering depth $dd. if (DEVEL_MODE) { foreach my $ii ( @{ $comma_index[$dd] } ) { if ( $ii < 0 || $ii > $max_index_to_go ) { my $KK = $K_to_go[0]; my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_]; Fault(<<EOM); Bad comma index near line $lno: i=$ii must be between 0 and $max_index_to_go EOM } } } my $bias = -.01; my $old_comma_break_count = 0; foreach my $ii ( @{ $comma_index[$dd] } ) { if ( $old_breakpoint_to_go[$ii] ) { $old_comma_break_count++; # Store the bias info for use by sub set_bond_strength push @{$rbond_strength_bias}, [ $ii, $bias ]; # reduce bias magnitude to force breaks in order $bias *= 0.99; } } # Also put a break before the first comma if # (1) there was a break there in the input, and # (2) there was exactly one old break before the first comma break # (3) OLD: there are multiple old comma breaks # (3) NEW: there are one or more old comma breaks (see return example) # (4) the first comma is at the starting level ... # ... fixes cases b064 b065 b068 b210 b747 # (5) the batch does not start with a ci>0 [ignore a ci change by -xci] # ... fixes b1220. If ci>0 we are in the middle of a snippet, # maybe because -boc has been forcing out previous lines. # For example, we will follow the user and break after # 'print' in this snippet: # print # "conformability (Not the same dimension)\n", # "\t", $have, " is ", text_unit($hu), "\n", # "\t", $want, " is ", text_unit($wu), "\n", # ; # # Another example, just one comma, where we will break after # the return: # return # $x * cos($a) - $y * sin($a), # $x * sin($a) + $y * cos($a); # Breaking a print statement: # print SAVEOUT # ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "", # ( $? & 128 ) ? " -- core dumped" : "", "\n"; # # But we will not force a break after the opening paren here # (causes a blinker): # $heap->{stream}->set_output_filter( # poe::filter::reference->new('myotherfreezer') ), # ; # my $i_first_comma = $comma_index[$dd]->[0]; my $level_comma = $levels_to_go[$i_first_comma]; my $ci_start = $ci_levels_to_go[0]; # Here we want to use the value of ci before any -xci adjustment if ( $ci_start && $rOpts_extended_continuation_indentation ) { my $K0 = $K_to_go[0]; if ( $self->[_rseqno_controlling_my_ci_]->{$K0} ) { $ci_start = 0 } } if ( !$ci_start && $old_breakpoint_to_go[$i_first_comma] && $level_comma == $levels_to_go[0] ) { my $ibreak = -1; my $obp_count = 0; foreach my $ii ( reverse( 0 .. $i_first_comma - 1 ) ) { if ( $old_breakpoint_to_go[$ii] ) { $obp_count++; last if ( $obp_count > 1 ); $ibreak = $ii if ( $levels_to_go[$ii] == $level_comma ); } } # Changed rule from multiple old commas to just one here: if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 ) { my $ibreak_m = $ibreak; $ibreak_m-- if ( $types_to_go[$ibreak_m] eq 'b' ); if ( $ibreak_m >= 0 ) { # In order to avoid blinkers we have to be fairly # restrictive: # OLD Rules: # Rule 1: Do not to break before an opening token # Rule 2: avoid breaking at ternary operators # (see b931, which is similar to the above print example) # Rule 3: Do not break at chain operators to fix case b1119 # - The previous test was '$typem !~ /^[\(\{\[L\?\:]$/' # NEW Rule, replaced above rules after case b1214: # only break at one of the included types # Be sure to test any changes to these rules against runs # with -l=0 such as the 'bbvt' test (perltidyrc_colin) # series. my $type_m = $types_to_go[$ibreak_m]; # Switched from excluded to included for b1214. If necessary # the token could also be checked if type_m eq 'k' if ( $is_uncontained_comma_break_included_type{$type_m} ) { # Rule added to fix b1449: # Do not break before a '?' if -nbot is set # Otherwise, we may alternately arrive here and # set the break, or not, depending on the input. my $no_break; my $ibreak_p = $inext_to_go[$ibreak_m]; if ( !$rOpts_break_at_old_ternary_breakpoints && $ibreak_p <= $max_index_to_go ) { my $type_p = $types_to_go[$ibreak_p]; $no_break = $type_p eq '?'; } $self->set_forced_breakpoint($ibreak) if ( !$no_break ); } } } } return; } ## end sub do_uncontained_comma_breaks my %is_logical_container; my %quick_filter; BEGIN { my @q = qw# if elsif unless while and or err not && | || ? : ! #; @is_logical_container{@q} = (1) x scalar(@q); # This filter will allow most tokens to skip past a section of code %quick_filter = %is_assignment; @q = qw# => . ; < > ~ #; push @q, ','; push @q, 'f'; # added for ';' for issue c154 @quick_filter{@q} = (1) x scalar(@q); } ## end BEGIN sub set_for_semicolon_breakpoints { my ( $self, $dd ) = @_; # Set breakpoints for semicolons in C-style 'for' containers foreach ( @{ $rfor_semicolon_list[$dd] } ) { $self->set_forced_breakpoint($_); } return; } ## end sub set_for_semicolon_breakpoints sub set_logical_breakpoints { my ( $self, $dd ) = @_; # Set breakpoints at logical operators if ( $item_count_stack[$dd] == 0 && $is_logical_container{ $container_type[$dd] } || $has_old_logical_breakpoints[$dd] ) { # Look for breaks in this order: # 0 1 2 3 # or and || && foreach my $i ( 0 .. 3 ) { if ( $rand_or_list[$dd][$i] ) { foreach ( @{ $rand_or_list[$dd][$i] } ) { $self->set_forced_breakpoint($_); } # break at any 'if' and 'unless' too foreach ( @{ $rand_or_list[$dd][4] } ) { $self->set_forced_breakpoint($_); } $rand_or_list[$dd] = []; last; } } } return; } ## end sub set_logical_breakpoints sub is_unbreakable_container { # never break a container of one of these types # because bad things can happen (map1.t) my $dd = shift; return $is_sort_map_grep{ $container_type[$dd] }; } ## end sub is_unbreakable_container sub break_lists { my ( $self, $is_long_line, $rbond_strength_bias ) = @_; #-------------------------------------------------------------------- # This routine is called once per batch, if the batch is a list, to # set line breaks so that hierarchical structure can be displayed and # so that list items can be vertically aligned. The output of this # routine is stored in the array @forced_breakpoint_to_go, which is # used by sub 'break_long_lines' to set final breakpoints. This is # probably the most complex routine in perltidy, so I have # broken it into pieces and over-commented it. #-------------------------------------------------------------------- $starting_depth = $nesting_depth_to_go[0]; $block_type = SPACE; $current_depth = $starting_depth; $i = -1; $i_last_colon = -1; $i_line_end = -1; $i_line_start = -1; $last_nonblank_token = ';'; $last_nonblank_type = ';'; $last_nonblank_block_type = SPACE; $last_old_breakpoint_count = 0; $minimum_depth = $current_depth + 1; # forces update in check below $old_breakpoint_count = 0; $starting_breakpoint_count = $forced_breakpoint_count; $token = ';'; $type = ';'; $type_sequence = EMPTY_STRING; my $total_depth_variation = 0; my $i_old_assignment_break; my $depth_last = $starting_depth; my $comma_follows_last_closing_token; $self->check_for_new_minimum_depth( $current_depth, $parent_seqno_to_go[0] ) if ( $current_depth < $minimum_depth ); my $i_want_previous_break = -1; my $saw_good_breakpoint; #---------------------------------------- # Main loop over all tokens in this batch #---------------------------------------- while ( ++$i <= $max_index_to_go ) { if ( $type ne 'b' ) { $i_last_nonblank_token = $i - 1; $last_nonblank_type = $type; $last_nonblank_token = $token; $last_nonblank_block_type = $block_type; } $type = $types_to_go[$i]; $block_type = $block_type_to_go[$i]; $token = $tokens_to_go[$i]; $type_sequence = $type_sequence_to_go[$i]; my $i_next_nonblank = $inext_to_go[$i]; $next_nonblank_type = $types_to_go[$i_next_nonblank]; $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; #------------------------------------------- # Loop Section A: Look for special breakpoints... #------------------------------------------- # set break if flag was set if ( $i_want_previous_break >= 0 ) { $self->set_forced_breakpoint($i_want_previous_break); $i_want_previous_break = -1; } $last_old_breakpoint_count = $old_breakpoint_count; # Check for a good old breakpoint .. if ( $old_breakpoint_to_go[$i] ) { ( $i_want_previous_break, $i_old_assignment_break ) = $self->examine_old_breakpoint( $i_next_nonblank, $i_want_previous_break, $i_old_assignment_break ); } next if ( $type eq 'b' ); $depth = $nesting_depth_to_go[ $i + 1 ]; $total_depth_variation += abs( $depth - $depth_last ); $depth_last = $depth; # safety check - be sure we always break after a comment # Shouldn't happen .. an error here probably means that the # nobreak flag did not get turned off correctly during # formatting. if ( $type eq '#' ) { if ( $i != $max_index_to_go ) { if (DEVEL_MODE) { Fault(<<EOM); Non-fatal program bug: backup logic required to break after a comment EOM } $nobreak_to_go[$i] = 0; $self->set_forced_breakpoint($i); } ## end if ( $i != $max_index_to_go) } ## end if ( $type eq '#' ) # Force breakpoints at certain tokens in long lines. # Note that such breakpoints will be undone later if these tokens # are fully contained within parens on a line. if ( # break before a keyword within a line $type eq 'k' && $i > 0 # if one of these keywords: && $is_if_unless_while_until_for_foreach{$token} # but do not break at something like '1 while' && ( $last_nonblank_type ne 'n' || $i > 2 ) # and let keywords follow a closing 'do' brace && ( !$last_nonblank_block_type || $last_nonblank_block_type ne 'do' ) && ( $is_long_line # or container is broken (by side-comment, etc) || ( $next_nonblank_token eq '(' && ( !defined( $mate_index_to_go[$i_next_nonblank] ) || $mate_index_to_go[$i_next_nonblank] < $i ) ) ) ) { $self->set_forced_breakpoint( $i - 1 ); } # remember locations of '||' and '&&' for possible breaks if we # decide this is a long logical expression. if ( $type eq '||' ) { push @{ $rand_or_list[$depth][2] }, $i; ++$has_old_logical_breakpoints[$depth] if ( ( $i == $i_line_start || $i == $i_line_end ) && $rOpts_break_at_old_logical_breakpoints ); } elsif ( $type eq '&&' ) { push @{ $rand_or_list[$depth][3] }, $i; ++$has_old_logical_breakpoints[$depth] if ( ( $i == $i_line_start || $i == $i_line_end ) && $rOpts_break_at_old_logical_breakpoints ); } elsif ( $type eq 'f' ) { push @{ $rfor_semicolon_list[$depth] }, $i; } elsif ( $type eq 'k' ) { if ( $token eq 'and' ) { push @{ $rand_or_list[$depth][1] }, $i; ++$has_old_logical_breakpoints[$depth] if ( ( $i == $i_line_start || $i == $i_line_end ) && $rOpts_break_at_old_logical_breakpoints ); } # break immediately at 'or's which are probably not in a logical # block -- but we will break in logical breaks below so that # they do not add to the forced_breakpoint_count elsif ( $token eq 'or' ) { push @{ $rand_or_list[$depth][0] }, $i; ++$has_old_logical_breakpoints[$depth] if ( ( $i == $i_line_start || $i == $i_line_end ) && $rOpts_break_at_old_logical_breakpoints ); if ( $is_logical_container{ $container_type[$depth] } ) { } else { if ($is_long_line) { $self->set_forced_breakpoint($i) } elsif ( ( $i == $i_line_start || $i == $i_line_end ) && $rOpts_break_at_old_logical_breakpoints ) { $saw_good_breakpoint = 1; } else { ## not a good break } } } elsif ( $token eq 'if' || $token eq 'unless' ) { push @{ $rand_or_list[$depth][4] }, $i; if ( ( $i == $i_line_start || $i == $i_line_end ) && $rOpts_break_at_old_logical_breakpoints ) { $self->set_forced_breakpoint($i); } } else { ## not one of: 'and' 'or' 'if' 'unless' } } elsif ( $is_assignment{$type} ) { $i_equals[$depth] = $i; } else { ## not a good breakpoint type } #----------------------------------------- # Loop Section B: Handle a sequenced token #----------------------------------------- if ($type_sequence) { $self->break_lists_type_sequence; } #------------------------------------------ # Loop Section C: Handle Increasing Depth.. #------------------------------------------ # hardened against bad input syntax: depth jump must be 1 and type # must be opening..fixes c102 if ( $depth == $current_depth + 1 && $is_opening_type{$type} ) { $self->break_lists_increasing_depth(); } #------------------------------------------ # Loop Section D: Handle Decreasing Depth.. #------------------------------------------ # hardened against bad input syntax: depth jump must be 1 and type # must be closing .. fixes c102 elsif ( $depth == $current_depth - 1 && $is_closing_type{$type} ) { $self->break_lists_decreasing_depth(); $comma_follows_last_closing_token = $next_nonblank_type eq ',' || $next_nonblank_type eq '=>'; } else { ## not a depth change } #---------------------------------- # Loop Section E: Handle this token #---------------------------------- $current_depth = $depth; # most token types can skip the rest of this loop next if ( !$quick_filter{$type} ); # Turn off comma alignment if we are sure that this is not a list # environment. To be safe, we will do this if we see certain # non-list tokens, such as ';', '=', and also the environment is # not a list. ## $type =~ /^[\;\<\>\~f]$/ || $is_assignment{$type} if ( $is_non_list_type{$type} ) { if ( !$self->is_in_list_by_i($i) ) { $dont_align[$depth] = 1; $want_comma_break[$depth] = 0; $index_before_arrow[$depth] = -1; # no special comma breaks in C-style 'for' terms (c154) if ( $type eq 'f' ) { $last_comma_index[$depth] = undef } } } # handle any commas elsif ( $type eq ',' ) { $self->study_comma($comma_follows_last_closing_token); } # handle comma-arrow elsif ( $type eq '=>' ) { next if ( $last_nonblank_type eq '=>' ); next if $rOpts_break_at_old_comma_breakpoints; next if ( $rOpts_comma_arrow_breakpoints == 3 && !defined( $override_cab3[$depth] ) ); $want_comma_break[$depth] = 1; $index_before_arrow[$depth] = $i_last_nonblank_token; next; } elsif ( $type eq '.' ) { $last_dot_index[$depth] = $i; } else { # error : no code to handle a type in %quick_filter DEVEL_MODE && Fault(<<EOM); Missing code to handle token type '$type' which is in the quick_filter EOM } } ## end while ( ++$i <= $max_index_to_go) #------------------------------------------- # END of loop over all tokens in this batch # Now set breaks for any unfinished lists .. #------------------------------------------- foreach my $dd ( reverse( $minimum_depth .. $current_depth ) ) { $interrupted_list[$dd] = 1; $has_broken_sublist[$dd] = 1 if ( $dd < $current_depth ); $self->set_comma_breakpoints( $dd, $rbond_strength_bias ) if ( $item_count_stack[$dd] ); $self->set_logical_breakpoints($dd) if ( $has_old_logical_breakpoints[$dd] ); $self->set_for_semicolon_breakpoints($dd); # break open container... my $i_opening = $opening_structure_index_stack[$dd]; if ( defined($i_opening) && $i_opening >= 0 ) { my $no_break = ( is_unbreakable_container($dd) # Avoid a break which would place an isolated ' or " # on a line || ( $type eq 'Q' && $i_opening >= $max_index_to_go - 2 && ( $token eq "'" || $token eq '"' ) ) ); $self->set_forced_breakpoint($i_opening) if ( !$no_break ); } } ## end for ( my $dd = $current_depth...) #---------------------------------------- # Return the flag '$saw_good_breakpoint'. #---------------------------------------- # This indicates if the input file had some good breakpoints. This # flag will be used to force a break in a line shorter than the # allowed line length. if ( $has_old_logical_breakpoints[$current_depth] ) { $saw_good_breakpoint = 1; } # A complex line with one break at an = has a good breakpoint. # This is not complex ($total_depth_variation=0): # $res1 # = 10; # # This is complex ($total_depth_variation=6): # $res2 = # (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert')); # The check ($i_old_.. < $max_index_to_go) was added to fix b1333 elsif ($i_old_assignment_break && $total_depth_variation > 4 && $old_breakpoint_count == 1 && $i_old_assignment_break < $max_index_to_go ) { $saw_good_breakpoint = 1; } else { ## not a good breakpoint } return $saw_good_breakpoint; } ## end sub break_lists sub study_comma { # study and store info for a list comma my ( $self, $comma_follows_last_closing_token ) = @_; $last_dot_index[$depth] = undef; $last_comma_index[$depth] = $i; # break here if this comma follows a '=>' # but not if there is a side comment after the comma if ( $want_comma_break[$depth] ) { if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) { if ($rOpts_comma_arrow_breakpoints) { $want_comma_break[$depth] = 0; return; } } $self->set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); # break before the previous token if it looks safe # Example of something that we will not try to break before: # DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt}, # Also we don't want to break at a binary operator (like +): # $c->createOval( # $x + $R, $y + # $R => $x - $R, # $y - $R, -fill => 'black', # ); my $ibreak = $index_before_arrow[$depth] - 1; if ( $ibreak > 0 && $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ ) { if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- } if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- } if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) { # don't break before a comma, as in the following: # ( LONGER_THAN,=> 1, # EIGHTY_CHARACTERS,=> 2, # CAUSES_FORMATTING,=> 3, # LIKE_THIS,=> 4, # ); # This example is for -tso but should be general rule if ( $tokens_to_go[ $ibreak + 1 ] ne '->' && $tokens_to_go[ $ibreak + 1 ] ne ',' ) { $self->set_forced_breakpoint($ibreak); } } } $want_comma_break[$depth] = 0; $index_before_arrow[$depth] = -1; # handle list which mixes '=>'s and ','s: # treat any list items so far as an interrupted list $interrupted_list[$depth] = 1; return; } # Break after all commas above starting depth... # But only if the last closing token was followed by a comma, # to avoid breaking a list operator (issue c119) if ( $depth < $starting_depth && $comma_follows_last_closing_token && !$dont_align[$depth] ) { $self->set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' ); return; } # add this comma to the list.. my $item_count = $item_count_stack[$depth]; if ( $item_count == 0 ) { # but do not form a list with no opening structure # for example: # open INFILE_COPY, ">$input_file_copy" # or die ("very long message"); if ( ( $opening_structure_index_stack[$depth] < 0 ) && $self->is_in_block_by_i($i) ) { $dont_align[$depth] = 1; } } $comma_index[$depth][$item_count] = $i; ++$item_count_stack[$depth]; if ( $last_nonblank_type =~ /^[iR\]]$/ ) { $identifier_count_stack[$depth]++; } return; } ## end sub study_comma my %poor_types; my %poor_keywords; my %poor_next_types; my %poor_next_keywords; BEGIN { # Setup filters for detecting very poor breaks to ignore. # b1097: old breaks after type 'L' and before 'R' are poor # b1450: old breaks at 'eq' and related operators are poor my @q = qw(== <= >= !=); @{poor_types}{@q} = (1) x scalar(@q); @{poor_next_types}{@q} = (1) x scalar(@q); $poor_types{'L'} = 1; $poor_next_types{'R'} = 1; @q = qw(eq ne le ge lt gt); @{poor_keywords}{@q} = (1) x scalar(@q); @{poor_next_keywords}{@q} = (1) x scalar(@q); } ## end BEGIN sub examine_old_breakpoint { my ( $self, $i_next_nonblank, $i_want_previous_break, $i_old_assignment_break ) = @_; # Look at an old breakpoint and set/update certain flags: # Given indexes of three tokens in this batch: # $i_next_nonblank - index of the next nonblank token # $i_want_previous_break - we want a break before this index # $i_old_assignment_break - the index of an '=' or equivalent # Update: # $old_breakpoint_count - a counter to increment unless poor break # Update and return: # $i_want_previous_break # $i_old_assignment_break #----------------------- # Filter out poor breaks #----------------------- # Just return if this is a poor break and pretend it does not exist. # Otherwise, poor breaks made under stress can cause instability. my $poor_break; if ( $type eq 'k' ) { $poor_break ||= $poor_keywords{$token} } else { $poor_break ||= $poor_types{$type} } if ( $next_nonblank_type eq 'k' ) { $poor_break ||= $poor_next_keywords{$next_nonblank_token}; } else { $poor_break ||= $poor_next_types{$next_nonblank_type} } # Also ignore any high stress level breaks; fixes b1395 $poor_break ||= $levels_to_go[$i] >= $high_stress_level; if ($poor_break) { goto RETURN } #-------------------------------------------- # Not a poor break, so continue to examine it #-------------------------------------------- $old_breakpoint_count++; $i_line_end = $i; $i_line_start = $i_next_nonblank; #--------------------------------------- # Do we want to break before this token? #--------------------------------------- # Break before certain keywords if user broke there and # this is a 'safe' break point. The idea is to retain # any preferred breaks for sequential list operations, # like a schwartzian transform. if ($rOpts_break_at_old_keyword_breakpoints) { if ( $next_nonblank_type eq 'k' && $is_keyword_returning_list{$next_nonblank_token} && ( $type =~ /^[=\)\]\}Riw]$/ || $type eq 'k' && $is_keyword_returning_list{$token} ) ) { # we actually have to set this break next time through # the loop because if we are at a closing token (such # as '}') which forms a one-line block, this break might # get undone. # But do not do this at an '=' if: # - the user wants breaks before an equals (b434 b903) # - or -naws is set (can be unstable, see b1354) my $skip = $type eq '=' && ( $want_break_before{$type} || !$rOpts_add_whitespace ); $i_want_previous_break = $i unless ($skip); } } # Break before attributes if user broke there if ($rOpts_break_at_old_attribute_breakpoints) { if ( $next_nonblank_type eq 'A' ) { $i_want_previous_break = $i; } } #--------------------------------- # Is this an old assignment break? #--------------------------------- if ( $is_assignment{$type} ) { $i_old_assignment_break = $i; } elsif ( $is_assignment{$next_nonblank_type} ) { $i_old_assignment_break = $i_next_nonblank; } else { ## not old assignment break } RETURN: return ( $i_want_previous_break, $i_old_assignment_break ); } ## end sub examine_old_breakpoint sub break_lists_type_sequence { my ($self) = @_; # We have encountered a sequenced token while setting list breakpoints # if closing type, one of } ) ] : if ( $is_closing_sequence_token{$token} ) { if ( $type eq ':' ) { $i_last_colon = $i; # retain break at a ':' line break if ( ( $i == $i_line_start || $i == $i_line_end ) && $rOpts_break_at_old_ternary_breakpoints && $levels_to_go[$i] < $high_stress_level ) { $self->set_forced_breakpoint($i); # Break at a previous '=', but only if it is before # the mating '?'. Mate_index test fixes b1287. my $ieq = $i_equals[$depth]; my $mix = $mate_index_to_go[$i]; if ( !defined($mix) ) { $mix = -1 } if ( $ieq > 0 && $ieq < $mix ) { $self->set_forced_breakpoint( $i_equals[$depth] ); $i_equals[$depth] = -1; } } } # handle any postponed closing breakpoints if ( has_postponed_breakpoint($type_sequence) ) { my $inc = ( $type eq ':' ) ? 0 : 1; if ( $i >= $inc ) { $self->set_forced_breakpoint( $i - $inc ); } } } # must be opening token, one of { ( [ ? else { # set breaks at ?/: if they will get separated (and are # not a ?/: chain), or if the '?' is at the end of the # line if ( $token eq '?' ) { my $i_colon = $mate_index_to_go[$i]; if ( !defined($i_colon) # the ':' is not in this batch || $i == 0 # this '?' is the first token of the line || $i == $max_index_to_go # or this '?' is the last token ) { # don't break if # this has a side comment, and # don't break at a '?' if preceded by ':' on # this line of previous ?/: pair on this line. # This is an attempt to preserve a chain of ?/: # expressions (elsif2.t). if ( ( $i_last_colon < 0 || $parent_seqno_to_go[$i_last_colon] != $parent_seqno_to_go[$i] ) && $tokens_to_go[$max_index_to_go] ne '#' ) { $self->set_forced_breakpoint($i); } $self->set_closing_breakpoint($i); } } # must be one of { ( [ else { # do requested -lp breaks at the OPENING token for BROKEN # blocks. NOTE: this can be done for both -lp and -xlp, # but only -xlp can really take advantage of this. So this # is currently restricted to -xlp to avoid excess changes to # existing -lp formatting. if ( $rOpts_extended_line_up_parentheses && !defined( $mate_index_to_go[$i] ) ) { my $lp_object = $self->[_rlp_object_by_seqno_]->{$type_sequence}; if ($lp_object) { my $K_begin_line = $lp_object->get_K_begin_line(); my $i_begin_line = $K_begin_line - $K_to_go[0]; $self->set_forced_lp_break( $i_begin_line, $i ); } } } } return; } ## end sub break_lists_type_sequence sub break_lists_increasing_depth { my ($self) = @_; #-------------------------------------------- # prepare for a new list when depth increases # token $i is a '(','{', or '[' #-------------------------------------------- #---------------------------------------------------------- # BEGIN initialize depth arrays # ... use the same order as sub check_for_new_minimum_depth #---------------------------------------------------------- $type_sequence_stack[$depth] = $type_sequence; $override_cab3[$depth] = undef; if ( $rOpts_comma_arrow_breakpoints == 3 && $type_sequence ) { $override_cab3[$depth] = $self->[_roverride_cab3_]->{$type_sequence}; } $breakpoint_stack[$depth] = $forced_breakpoint_count; $container_type[$depth] = # k => && || ? : . $is_container_label_type{$last_nonblank_type} ? $last_nonblank_token : EMPTY_STRING; $identifier_count_stack[$depth] = 0; $index_before_arrow[$depth] = -1; $interrupted_list[$depth] = 0; $item_count_stack[$depth] = 0; $last_nonblank_type[$depth] = $last_nonblank_type; $opening_structure_index_stack[$depth] = $i; $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count; $comma_index[$depth] = undef; $last_comma_index[$depth] = undef; $last_dot_index[$depth] = undef; $old_breakpoint_count_stack[$depth] = $old_breakpoint_count; $has_old_logical_breakpoints[$depth] = 0; $rand_or_list[$depth] = []; $rfor_semicolon_list[$depth] = []; $i_equals[$depth] = -1; # if line ends here then signal closing token to break if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) { $self->set_closing_breakpoint($i); } # Not all lists of values should be vertically aligned.. $dont_align[$depth] = # code BLOCKS are handled at a higher level ##( $block_type ne EMPTY_STRING ) $block_type # certain paren lists || ( $type eq '(' ) && ( # it does not usually look good to align a list of # identifiers in a parameter list, as in: # my($var1, $var2, ...) # (This test should probably be refined, for now I'm just # testing for any keyword) ( $last_nonblank_type eq 'k' ) # a trailing '(' usually indicates a non-list || ( $next_nonblank_type eq '(' ) ); $has_broken_sublist[$depth] = 0; $want_comma_break[$depth] = 0; #---------------------------- # END initialize depth arrays #---------------------------- # patch to outdent opening brace of long if/for/.. # statements (like this one). See similar coding in # set_continuation breaks. We have also catch it here for # short line fragments which otherwise will not go through # break_long_lines. if ( $block_type # if we have the ')' but not its '(' in this batch.. && ( $last_nonblank_token eq ')' ) && !defined( $mate_index_to_go[$i_last_nonblank_token] ) # and user wants brace to left && !$rOpts_opening_brace_always_on_right && ( $type eq '{' ) # should be true && ( $token eq '{' ) # should be true ) { $self->set_forced_breakpoint( $i - 1 ); } return; } ## end sub break_lists_increasing_depth sub break_lists_decreasing_depth { my ( $self, $rbond_strength_bias ) = @_; # We have arrived at a closing container token in sub break_lists: # the token at index $i is one of these: ')','}', ']' # A number of important breakpoints for this container can now be set # based on the information that we have collected. This includes: # - breaks at commas to format tables # - breaks at certain logical operators and other good breakpoints # - breaks at opening and closing containers if needed by selected # formatting styles # These breaks are made by calling sub 'set_forced_breakpoint' $self->check_for_new_minimum_depth( $depth, $parent_seqno_to_go[$i] ) if ( $depth < $minimum_depth ); # force all outer logical containers to break after we see on # old breakpoint $has_old_logical_breakpoints[$depth] ||= $has_old_logical_breakpoints[$current_depth]; # Patch to break between ') {' if the paren list is broken. # There is similar logic in break_long_lines for # non-broken lists. if ( $token eq ')' && $next_nonblank_block_type && $interrupted_list[$current_depth] && $next_nonblank_type eq '{' && !$rOpts_opening_brace_always_on_right ) { $self->set_forced_breakpoint($i); } #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n"; #----------------------------------------------------------------- # Set breaks at commas to display a table of values if appropriate #----------------------------------------------------------------- my ( $bp_count, $do_not_break_apart ) = ( 0, 0 ); ( $bp_count, $do_not_break_apart ) = $self->set_comma_breakpoints( $current_depth, $rbond_strength_bias ) if ( $item_count_stack[$current_depth] ); #----------------------------------------------------------- # Now set flags needed to decide if we should break open the # container ... This is a long rambling section which has # grown over time to handle all situations. #----------------------------------------------------------- my $i_opening = $opening_structure_index_stack[$current_depth]; my $saw_opening_structure = ( $i_opening >= 0 ); my $lp_object; if ( $rOpts_line_up_parentheses && $saw_opening_structure ) { $lp_object = $self->[_rlp_object_by_seqno_] ->{ $type_sequence_to_go[$i_opening] }; } # this term is long if we had to break at interior commas.. my $is_long_term = $bp_count > 0; # If this is a short container with one or more comma arrows, # then we will mark it as a long term to open it if requested. # $rOpts_comma_arrow_breakpoints = # 0 - open only if comma precedes closing brace # 1 - stable: except for one line blocks # 2 - try to form 1 line blocks # 3 - ignore => # 4 - always open up if vt=0 # 5 - stable: even for one line blocks if vt=0 my $cab_flag = $rOpts_comma_arrow_breakpoints; # replace -cab=3 if overriden if ( $cab_flag == 3 && $type_sequence ) { my $test_cab = $self->[_roverride_cab3_]->{$type_sequence}; if ( defined($test_cab) ) { $cab_flag = $test_cab } } # PATCH: Modify the -cab flag if we are not processing a list: # We only want the -cab flag to apply to list containers, so # for non-lists we use the default and stable -cab=5 value. # Fixes case b939a. if ( $type_sequence && !$self->[_ris_list_by_seqno_]->{$type_sequence} ) { $cab_flag = 5; } # Ignore old breakpoints when under stress. # Fixes b1203 b1204 as well as b1197-b1200. # But not if -lp: fixes b1264, b1265. NOTE: rechecked with # b1264 to see if this check is still required at all, and # these still require a check, but at higher level beta+3 # instead of beta: b1193 b780 if ( $saw_opening_structure && !$lp_object && $levels_to_go[$i_opening] >= $high_stress_level ) { $cab_flag = 2; # Do not break hash braces under stress (fixes b1238) $do_not_break_apart ||= $types_to_go[$i_opening] eq 'L'; # This option fixes b1235, b1237, b1240 with old and new # -lp, but formatting is nicer with next option. ## $is_long_term ||= ## $levels_to_go[$i_opening] > $stress_level_beta + 1; # This option fixes b1240 but not b1235, b1237 with new -lp, # but this gives better formatting than the previous option. # TODO: see if stress_level_alpha should also be considered $do_not_break_apart ||= $levels_to_go[$i_opening] > $stress_level_beta; } if ( !$is_long_term && $saw_opening_structure && $is_opening_token{ $tokens_to_go[$i_opening] } && $index_before_arrow[ $depth + 1 ] > 0 && !$opening_vertical_tightness{ $tokens_to_go[$i_opening] } ) { $is_long_term = $cab_flag == 4 || $cab_flag == 0 && $last_nonblank_token eq ',' || $cab_flag == 5 && $old_breakpoint_to_go[$i_opening]; } # mark term as long if the length between opening and closing # parens exceeds allowed line length if ( !$is_long_term && $saw_opening_structure ) { my $i_opening_minus = $self->find_token_starting_list($i_opening); my $excess = $self->excess_line_length( $i_opening_minus, $i ); # Use standard spaces for indentation of lists in -lp mode # if it gives a longer line length. This helps to avoid an # instability due to forming and breaking one-line blocks. # This fixes case b1314. my $indentation = $leading_spaces_to_go[$i_opening_minus]; if ( ref($indentation) && $self->[_ris_broken_container_]->{$type_sequence} ) { my $lp_spaces = $indentation->get_spaces(); my $std_spaces = $indentation->get_standard_spaces(); my $diff = $std_spaces - $lp_spaces; if ( $diff > 0 ) { $excess += $diff } } my $tol = $length_tol; # boost tol for an -lp container if ( $lp_tol_boost && $lp_object && ( $rOpts_extended_continuation_indentation || !$self->[_ris_list_by_seqno_]->{$type_sequence} ) ) { $tol += $lp_tol_boost; } # Patch to avoid blinking with -bbxi=2 and -cab=2 # in which variations in -ci cause unstable formatting # in edge cases. We just always add one ci level so that # the formatting is independent of the -BBX results. # Fixes cases b1137 b1149 b1150 b1155 b1158 b1159 b1160 # b1161 b1166 b1167 b1168 if ( !$ci_levels_to_go[$i_opening] && $self->[_rbreak_before_container_by_seqno_]->{$type_sequence} ) { $tol += $rOpts_continuation_indentation; } $is_long_term = $excess + $tol > 0; } # We've set breaks after all comma-arrows. Now we have to # undo them if this can be a one-line block # (the only breakpoints set will be due to comma-arrows) if ( # user doesn't require breaking after all comma-arrows ( $cab_flag != 0 ) && ( $cab_flag != 4 ) # and if the opening structure is in this batch && $saw_opening_structure # and either on the same old line && ( $old_breakpoint_count_stack[$current_depth] == $last_old_breakpoint_count # or user wants to form long blocks with arrows || $cab_flag == 2 ) # and we made breakpoints between the opening and closing && ( $breakpoint_undo_stack[$current_depth] < $forced_breakpoint_undo_count ) # and this block is short enough to fit on one line # Note: use < because need 1 more space for possible comma && !$is_long_term ) { $self->undo_forced_breakpoint_stack( $breakpoint_undo_stack[$current_depth] ); } # now see if we have any comma breakpoints left my $has_comma_breakpoints = ( $breakpoint_stack[$current_depth] != $forced_breakpoint_count ); # update broken-sublist flag of the outer container $has_broken_sublist[$depth] = $has_broken_sublist[$depth] || $has_broken_sublist[$current_depth] || $is_long_term || $has_comma_breakpoints; # Having come to the closing ')', '}', or ']', now we have to decide # if we should 'open up' the structure by placing breaks at the # opening and closing containers. This is a tricky decision. Here # are some of the basic considerations: # # -If this is a BLOCK container, then any breakpoints will have # already been set (and according to user preferences), so we need do # nothing here. # # -If we have a comma-separated list for which we can align the list # items, then we need to do so because otherwise the vertical aligner # cannot currently do the alignment. # # -If this container does itself contain a container which has been # broken open, then it should be broken open to properly show the # structure. # # -If there is nothing to align, and no other reason to break apart, # then do not do it. # # We will not break open the parens of a long but 'simple' logical # expression. For example: # # This is an example of a simple logical expression and its formatting: # # if ( $bigwasteofspace1 && $bigwasteofspace2 # || $bigwasteofspace3 && $bigwasteofspace4 ) # # Most people would prefer this than the 'spacey' version: # # if ( # $bigwasteofspace1 && $bigwasteofspace2 # || $bigwasteofspace3 && $bigwasteofspace4 # ) # # To illustrate the rules for breaking logical expressions, consider: # # FULLY DENSE: # if ( $opt_excl # and ( exists $ids_excl_uc{$id_uc} # or grep $id_uc =~ /$_/, @ids_excl_uc )) # # This is on the verge of being difficult to read. The current # default is to open it up like this: # # DEFAULT: # if ( # $opt_excl # and ( exists $ids_excl_uc{$id_uc} # or grep $id_uc =~ /$_/, @ids_excl_uc ) # ) # # This is a compromise which tries to avoid being too dense and to # spacey. A more spaced version would be: # # SPACEY: # if ( # $opt_excl # and ( # exists $ids_excl_uc{$id_uc} # or grep $id_uc =~ /$_/, @ids_excl_uc # ) # ) # # Some people might prefer the spacey version -- an option could be # added. The innermost expression contains a long block '( exists # $ids_... ')'. # # Here is how the logic goes: We will force a break at the 'or' that # the innermost expression contains, but we will not break apart its # opening and closing containers because (1) it contains no # multi-line sub-containers itself, and (2) there is no alignment to # be gained by breaking it open like this # # and ( # exists $ids_excl_uc{$id_uc} # or grep $id_uc =~ /$_/, @ids_excl_uc # ) # # (although this looks perfectly ok and might be good for long # expressions). The outer 'if' container, though, contains a broken # sub-container, so it will be broken open to avoid too much density. # Also, since it contains no 'or's, there will be a forced break at # its 'and'. # Handle the experimental flag --break-open-compact-parens # NOTE: This flag is not currently used and may eventually be removed. # If this flag is set, we will implement it by # pretending we did not see the opening structure, since in that case # parens always get opened up. if ( $saw_opening_structure && $rOpts_break_open_compact_parens ) { # This parameter is a one-character flag, as follows: # '0' matches no parens -> break open NOT OK # '1' matches all parens -> break open OK # Other values are same as used by the weld-exclusion-list my $flag = $rOpts_break_open_compact_parens; if ( $flag eq '*' || $flag eq '1' ) { $saw_opening_structure = 0; } else { # NOTE: $seqno will be equal to closure var $type_sequence here my $seqno = $type_sequence_to_go[$i_opening]; $saw_opening_structure = !$self->match_paren_control_flag( $seqno, $flag ); } } # Set some more flags telling something about this container.. my $is_simple_logical_expression; if ( $item_count_stack[$current_depth] == 0 && $saw_opening_structure && $tokens_to_go[$i_opening] eq '(' && $is_logical_container{ $container_type[$current_depth] } ) { # This seems to be a simple logical expression with # no existing breakpoints. Set a flag to prevent # opening it up. if ( !$has_comma_breakpoints ) { $is_simple_logical_expression = 1; } #--------------------------------------------------- # This seems to be a simple logical expression with # breakpoints (broken sublists, for example). Break # at all 'or's and '||'s. #--------------------------------------------------- else { $self->set_logical_breakpoints($current_depth); } } # break long terms at any C-style for semicolons (c154) if ( $is_long_term && @{ $rfor_semicolon_list[$current_depth] } ) { $self->set_for_semicolon_breakpoints($current_depth); # and open up a long 'for' or 'foreach' container to allow # leading term alignment unless -lp is used. $has_comma_breakpoints = 1 unless ($lp_object); } #---------------------------------------------------------------- # FINALLY: Break open container according to the flags which have # been set. #---------------------------------------------------------------- if ( # breaks for code BLOCKS are handled at a higher level !$block_type # we do not need to break at the top level of an 'if' # type expression && !$is_simple_logical_expression ## modification to keep ': (' containers vertically tight; ## but probably better to let user set -vt=1 to avoid ## inconsistency with other paren types ## && ($container_type[$current_depth] ne ':') # otherwise, we require one of these reasons for breaking: && ( # - this term has forced line breaks $has_comma_breakpoints # - the opening container is separated from this batch # for some reason (comment, blank line, code block) # - this is a non-paren container spanning multiple lines || !$saw_opening_structure # - this is a long block contained in another breakable # container || $is_long_term && !$self->is_in_block_by_i($i_opening) ) ) { # do special -lp breaks at the CLOSING token for INTACT # blocks (because we might not do them if the block does # not break open) if ($lp_object) { my $K_begin_line = $lp_object->get_K_begin_line(); my $i_begin_line = $K_begin_line - $K_to_go[0]; $self->set_forced_lp_break( $i_begin_line, $i_opening ); } # break after opening structure. # note: break before closing structure will be automatic if ( $minimum_depth <= $current_depth ) { if ( $i_opening >= 0 ) { if ( !$do_not_break_apart && !is_unbreakable_container($current_depth) ) { $self->set_forced_breakpoint($i_opening); # Do not let brace types L/R use vertical tightness # flags to recombine if we have to break on length # because instability is possible if both vt and vtc # flags are set ... see issue b1444. if ( $is_long_term && $types_to_go[$i_opening] eq 'L' && $opening_vertical_tightness{'{'} && $closing_vertical_tightness{'}'} ) { my $seqno = $type_sequence_to_go[$i_opening]; if ($seqno) { $self->[_rbreak_container_]->{$seqno} = 1; } } } } # break at ',' of lower depth level before opening token if ( $last_comma_index[$depth] ) { $self->set_forced_breakpoint( $last_comma_index[$depth] ); } # break at '.' of lower depth level before opening token if ( $last_dot_index[$depth] ) { $self->set_forced_breakpoint( $last_dot_index[$depth] ); } # break before opening structure if preceded by another # closing structure and a comma. This is normally # done by the previous closing brace, but not # if it was a one-line block. if ( $i_opening > 2 ) { my $i_prev = ( $types_to_go[ $i_opening - 1 ] eq 'b' ) ? $i_opening - 2 : $i_opening - 1; my $type_prev = $types_to_go[$i_prev]; my $token_prev = $tokens_to_go[$i_prev]; if ( $type_prev eq ',' && ( $types_to_go[ $i_prev - 1 ] eq ')' || $types_to_go[ $i_prev - 1 ] eq '}' ) ) { $self->set_forced_breakpoint($i_prev); } # also break before something like ':(' or '?(' # if appropriate. elsif ($type_prev =~ /^([k\:\?]|&&|\|\|)$/ && $want_break_before{$token_prev} ) { $self->set_forced_breakpoint($i_prev); } else { ## not a breakpoint } } } # break after comma following closing structure if ( $types_to_go[ $i + 1 ] eq ',' ) { $self->set_forced_breakpoint( $i + 1 ); } # break before an '=' following closing structure if ( $is_assignment{$next_nonblank_type} && ( $breakpoint_stack[$current_depth] != $forced_breakpoint_count ) ) { $self->set_forced_breakpoint($i); } # break at any comma before the opening structure Added # for -lp, but seems to be good in general. It isn't # obvious how far back to look; the '5' below seems to # work well and will catch the comma in something like # push @list, myfunc( $param, $param, .. my $icomma = $last_comma_index[$depth]; if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) { if ( !$forced_breakpoint_to_go[$icomma] ) { $self->set_forced_breakpoint($icomma); } } } #----------------------------------------------------------- # Break open a logical container open if it was already open #----------------------------------------------------------- elsif ($is_simple_logical_expression && $has_old_logical_breakpoints[$current_depth] ) { $self->set_logical_breakpoints($current_depth); } # Handle long container which does not get opened up elsif ($is_long_term) { # must set fake breakpoint to alert outer containers that # they are complex set_fake_breakpoint(); } else { ## do not break open } return; } ## end sub break_lists_decreasing_depth } ## end closure break_lists my %is_kwiZ; my %is_key_type; BEGIN { # Added 'w' to fix b1172 my @q = qw(k w i Z ->); @is_kwiZ{@q} = (1) x scalar(@q); # added = for b1211 @q = qw<( [ { L R } ] ) = b>; push @q, ','; @is_key_type{@q} = (1) x scalar(@q); } ## end BEGIN use constant DEBUG_FIND_START => 0; sub find_token_starting_list { # When testing to see if a block will fit on one line, some # previous token(s) may also need to be on the line; particularly # if this is a sub call. So we will look back at least one # token. my ( $self, $i_opening_paren ) = @_; # This will be the return index my $i_opening_minus = $i_opening_paren; if ( $i_opening_minus <= 0 ) { return $i_opening_minus; } my $im1 = $i_opening_paren - 1; my ( $iprev_nb, $type_prev_nb ) = ( $im1, $types_to_go[$im1] ); if ( $type_prev_nb eq 'b' && $iprev_nb > 0 ) { $iprev_nb -= 1; $type_prev_nb = $types_to_go[$iprev_nb]; } if ( $type_prev_nb eq ',' ) { # a previous comma is a good break point # $i_opening_minus = $i_opening_paren; } elsif ( $tokens_to_go[$i_opening_paren] eq '(' # non-parens added here to fix case b1186 || $is_kwiZ{$type_prev_nb} ) { $i_opening_minus = $im1; # Walk back to improve length estimate... # FIX for cases b1169 b1170 b1171: start walking back # at the previous nonblank. This makes the result insensitive # to the flag --space-function-paren, and similar. # previous loop: for ( my $j = $im1 ; $j >= 0 ; $j-- ) { foreach my $j ( reverse( 0 .. $iprev_nb ) ) { if ( $is_key_type{ $types_to_go[$j] } ) { # fix for b1211 if ( $types_to_go[$j] eq '=' ) { $i_opening_minus = $j } last; } $i_opening_minus = $j; } if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ } } else { ## previous token not special } DEBUG_FIND_START && print <<EOM; FIND_START: i=$i_opening_paren tok=$tokens_to_go[$i_opening_paren] => im=$i_opening_minus tok=$tokens_to_go[$i_opening_minus] EOM return $i_opening_minus; } ## end sub find_token_starting_list { ## begin closure table_maker my %is_keyword_with_special_leading_term; BEGIN { # These keywords have prototypes which allow a special leading item # followed by a list my @q = qw( chmod formline grep join kill map pack printf push sprintf unshift ); @is_keyword_with_special_leading_term{@q} = (1) x scalar(@q); } ## end BEGIN use constant DEBUG_SPARSE => 0; sub table_maker { # Given a list of comma-separated items, set breakpoints at some of # the commas, if necessary, to make it easy to read. # This is done by making calls to 'set_forced_breakpoint'. # This is a complex routine because there are many special cases. # Returns: nothing # The numerous variables involved are contained three hashes: # $rhash_IN : For contents see the calling routine # $rhash_A: For contents see return from sub 'table_layout_A' # $rhash_B: For contents see return from sub 'table_layout_B' my ( $self, $rhash_IN ) = @_; # Find lengths of all list items needed for calculating page layout my $rhash_A = table_layout_A($rhash_IN); return if ( !defined($rhash_A) ); # Some variables received from caller... my $i_closing_paren = $rhash_IN->{i_closing_paren}; my $i_opening_paren = $rhash_IN->{i_opening_paren}; my $has_broken_sublist = $rhash_IN->{has_broken_sublist}; my $interrupted = $rhash_IN->{interrupted}; #----------------------------------------- # Section A: Handle some special cases ... #----------------------------------------- #------------------------------------------------------------- # Special Case A1: Compound List Rule 1: # Break at (almost) every comma for a list containing a broken # sublist. This has higher priority than the Interrupted List # Rule. #------------------------------------------------------------- if ($has_broken_sublist) { $self->apply_broken_sublist_rule( $rhash_A, $interrupted ); return; } #-------------------------------------------------------------- # Special Case A2: Interrupted List Rule: # A list is forced to use old breakpoints if it was interrupted # by side comments or blank lines, or requested by user. #-------------------------------------------------------------- if ( $rOpts_break_at_old_comma_breakpoints || $interrupted || $i_opening_paren < 0 ) { my $i_first_comma = $rhash_A->{_i_first_comma}; my $i_true_last_comma = $rhash_A->{_i_true_last_comma}; $self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma ); return; } #----------------------------------------------------------------- # Special Case A3: If it fits on one line, return and let the line # break logic decide if and where to break. #----------------------------------------------------------------- # The -bbxi=2 parameters can add an extra hidden level of indentation # so they need a tolerance to avoid instability. Fixes b1259, 1260. my $opening_token = $tokens_to_go[$i_opening_paren]; my $tol = 0; if ( $break_before_container_types{$opening_token} && $container_indentation_options{$opening_token} && $container_indentation_options{$opening_token} == 2 ) { $tol = $rOpts_indent_columns; # use greater of -ci and -i (fix for case b1334) if ( $tol < $rOpts_continuation_indentation ) { $tol = $rOpts_continuation_indentation; } } # Increase tol when -atc and -dtc are both used to allow for # possible loss in length on next pass due to a comma. Fixes b1455. if ( $rOpts_delete_trailing_commas && $rOpts_add_trailing_commas ) { $tol += 1; } my $i_opening_minus = $self->find_token_starting_list($i_opening_paren); my $excess = $self->excess_line_length( $i_opening_minus, $i_closing_paren ); return if ( $excess + $tol <= 0 ); #--------------------------------------- # Section B: Handle a multiline list ... #--------------------------------------- $self->break_multiline_list( $rhash_IN, $rhash_A, $i_opening_minus ); return; } ## end sub table_maker sub apply_broken_sublist_rule { my ( $self, $rhash_A, $interrupted ) = @_; # Break at (almost) every comma for a list containing a broken # sublist. my $ritem_lengths = $rhash_A->{_ritem_lengths}; my $ri_term_begin = $rhash_A->{_ri_term_begin}; my $ri_term_end = $rhash_A->{_ri_term_end}; my $ri_term_comma = $rhash_A->{_ri_term_comma}; my $item_count = $rhash_A->{_item_count_A}; my $i_first_comma = $rhash_A->{_i_first_comma}; my $i_true_last_comma = $rhash_A->{_i_true_last_comma}; # Break at every comma except for a comma between two # simple, small terms. This prevents long vertical # columns of, say, just 0's. my $small_length = 10; # 2 + actual maximum length wanted # We'll insert a break in long runs of small terms to # allow alignment in uniform tables. my $skipped_count = 0; my $columns = table_columns_available($i_first_comma); my $fields = int( $columns / $small_length ); if ( $rOpts_maximum_fields_per_table && $fields > $rOpts_maximum_fields_per_table ) { $fields = $rOpts_maximum_fields_per_table; } my $max_skipped_count = $fields - 1; my $is_simple_last_term = 0; my $is_simple_next_term = 0; foreach my $j ( 0 .. $item_count ) { $is_simple_last_term = $is_simple_next_term; $is_simple_next_term = 0; if ( $j < $item_count && $ri_term_end->[$j] == $ri_term_begin->[$j] && $ritem_lengths->[$j] <= $small_length ) { $is_simple_next_term = 1; } next if $j == 0; if ( $is_simple_last_term && $is_simple_next_term && $skipped_count < $max_skipped_count ) { $skipped_count++; } else { $skipped_count = 0; my $i_tc = $ri_term_comma->[ $j - 1 ]; last unless defined $i_tc; $self->set_forced_breakpoint($i_tc); } } # always break at the last comma if this list is # interrupted; we wouldn't want to leave a terminal '{', for # example. if ($interrupted) { $self->set_forced_breakpoint($i_true_last_comma); } return; } ## end sub apply_broken_sublist_rule sub set_emergency_comma_breakpoints { my ( $self, # $number_of_fields_best, $rhash_IN, $comma_count, $i_first_comma, ) = @_; # The computed number of table fields is negative, so we have to make # an emergency fix. my $rcomma_index = $rhash_IN->{rcomma_index}; my $next_nonblank_type = $rhash_IN->{next_nonblank_type}; my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart}; my $must_break_open = $rhash_IN->{must_break_open}; # are we an item contained in an outer list? my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/; # In many cases, it may be best to not force a break if there is just # one comma, because the standard continuation break logic will do a # better job without it. # In the common case that all but one of the terms can fit # on a single line, it may look better not to break open the # containing parens. Consider, for example # $color = # join ( '/', # sort { $color_value{$::a} <=> $color_value{$::b}; } # keys %colors ); # which will look like this with the container broken: # $color = join ( # '/', # sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors # ); # Here is an example of this rule for a long last term: # log_message( 0, 256, 128, # "Number of routes in adj-RIB-in to be considered: $peercount" ); # And here is an example with a long first term: # $s = sprintf( # "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)", # $r, $pu, $ps, $cu, $cs, $tt # ) # if $style eq 'all'; my $i_last_comma = $rcomma_index->[ $comma_count - 1 ]; my $long_last_term = $self->excess_line_length( 0, $i_last_comma ) <= 0; my $long_first_term = $self->excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0; # break at every comma ... if ( # if requested by user or is best looking $number_of_fields_best == 1 # or if this is a sublist of a larger list || $in_hierarchical_list # or if multiple commas and we don't have a long first or last # term || ( $comma_count > 1 && !( $long_last_term || $long_first_term ) ) ) { foreach ( 0 .. $comma_count - 1 ) { $self->set_forced_breakpoint( $rcomma_index->[$_] ); } } elsif ($long_last_term) { $self->set_forced_breakpoint($i_last_comma); ${$rdo_not_break_apart} = 1 unless $must_break_open; } elsif ($long_first_term) { $self->set_forced_breakpoint($i_first_comma); } else { # let breaks be defined by default bond strength logic } return; } ## end sub set_emergency_comma_breakpoints sub break_multiline_list { my ( $self, $rhash_IN, $rhash_A, $i_opening_minus ) = @_; # We have a list spanning multiple lines and are trying # to decide the best way to set comma breakpoints. # Overriden variables my $item_count = $rhash_A->{_item_count_A}; my $identifier_count = $rhash_A->{_identifier_count_A}; # Derived variables: my $ritem_lengths = $rhash_A->{_ritem_lengths}; my $ri_term_begin = $rhash_A->{_ri_term_begin}; my $ri_term_end = $rhash_A->{_ri_term_end}; my $ri_term_comma = $rhash_A->{_ri_term_comma}; my $rmax_length = $rhash_A->{_rmax_length}; my $comma_count = $rhash_A->{_comma_count}; my $i_effective_last_comma = $rhash_A->{_i_effective_last_comma}; my $first_term_length = $rhash_A->{_first_term_length}; my $i_first_comma = $rhash_A->{_i_first_comma}; my $i_last_comma = $rhash_A->{_i_last_comma}; my $i_true_last_comma = $rhash_A->{_i_true_last_comma}; # Variables received from caller my $i_opening_paren = $rhash_IN->{i_opening_paren}; my $i_closing_paren = $rhash_IN->{i_closing_paren}; my $rcomma_index = $rhash_IN->{rcomma_index}; my $next_nonblank_type = $rhash_IN->{next_nonblank_type}; my $list_type = $rhash_IN->{list_type}; my $interrupted = $rhash_IN->{interrupted}; my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart}; my $must_break_open = $rhash_IN->{must_break_open}; ## NOTE: these input vars from caller use the values from rhash_A (see above): ## my $item_count = $rhash_IN->{item_count}; ## my $identifier_count = $rhash_IN->{identifier_count}; # NOTE: i_opening_paren changes value below so we need to get these here my $opening_is_in_block = $self->is_in_block_by_i($i_opening_paren); my $opening_token = $tokens_to_go[$i_opening_paren]; #--------------------------------------------------------------- # Section B1: Determine '$number_of_fields' = the best number of # fields to use if this is to be formatted as a table. #--------------------------------------------------------------- # Now we know that this block spans multiple lines; we have to set # at least one breakpoint -- real or fake -- as a signal to break # open any outer containers. set_fake_breakpoint(); # Set a flag indicating if we need to break open to keep -lp # items aligned. This is necessary if any of the list terms # exceeds the available space after the '('. my $need_lp_break_open = $must_break_open; my $is_lp_formatting = ref( $leading_spaces_to_go[$i_first_comma] ); if ( $is_lp_formatting && !$must_break_open ) { my $columns_if_unbroken = $maximum_line_length_at_level[ $levels_to_go[$i_opening_minus] ] - total_line_length( $i_opening_minus, $i_opening_paren ); $need_lp_break_open = ( $rmax_length->[0] > $columns_if_unbroken ) || ( $rmax_length->[1] > $columns_if_unbroken ) || ( $first_term_length > $columns_if_unbroken ); } my $hash_B = $self->table_layout_B( $rhash_IN, $rhash_A, $is_lp_formatting ); return if ( !defined($hash_B) ); # Updated variables $i_first_comma = $hash_B->{_i_first_comma_B}; $i_opening_paren = $hash_B->{_i_opening_paren_B}; $item_count = $hash_B->{_item_count_B}; # New variables my $columns = $hash_B->{_columns}; my $formatted_columns = $hash_B->{_formatted_columns}; my $formatted_lines = $hash_B->{_formatted_lines}; my $max_width = $hash_B->{_max_width}; my $new_identifier_count = $hash_B->{_new_identifier_count}; my $number_of_fields = $hash_B->{_number_of_fields}; my $odd_or_even = $hash_B->{_odd_or_even}; my $packed_columns = $hash_B->{_packed_columns}; my $packed_lines = $hash_B->{_packed_lines}; my $pair_width = $hash_B->{_pair_width}; my $ri_ragged_break_list = $hash_B->{_ri_ragged_break_list}; my $use_separate_first_term = $hash_B->{_use_separate_first_term}; # are we an item contained in an outer list? my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/; my $unused_columns = $formatted_columns - $packed_columns; # set some empirical parameters to help decide if we should try to # align; high sparsity does not look good, especially with few lines my $sparsity = ($unused_columns) / ($formatted_columns); my $max_allowed_sparsity = ( $item_count < 3 ) ? 0.1 : ( $packed_lines == 1 ) ? 0.15 : ( $packed_lines == 2 ) ? 0.4 : 0.7; my $two_line_word_wrap_ok; if ( $opening_token eq '(' ) { # default is to allow wrapping of short paren lists $two_line_word_wrap_ok = 1; # but turn off word wrap where requested if ($rOpts_break_open_compact_parens) { # This parameter is a one-character flag, as follows: # '0' matches no parens -> break open NOT OK -> word wrap OK # '1' matches all parens -> break open OK -> word wrap NOT OK # Other values are the same as used by the weld-exclusion-list my $flag = $rOpts_break_open_compact_parens; if ( $flag eq '*' || $flag eq '1' ) { $two_line_word_wrap_ok = 0; } elsif ( $flag eq '0' ) { $two_line_word_wrap_ok = 1; } else { my $seqno = $type_sequence_to_go[$i_opening_paren]; $two_line_word_wrap_ok = !$self->match_paren_control_flag( $seqno, $flag ); } } } #------------------------------------------------------------------- # Section B2: Check for shortcut methods, which avoid treating # a list as a table for relatively small parenthesized lists. These # are usually easier to read if not formatted as tables. #------------------------------------------------------------------- if ( $packed_lines <= 2 # probably can fit in 2 lines && $item_count < 9 # doesn't have too many items && $opening_is_in_block # not a sub-container && $two_line_word_wrap_ok # ok to wrap this paren list ) { # Section B2A: Shortcut method 1: for -lp and just one comma: # This is a no-brainer, just break at the comma. if ( $is_lp_formatting # -lp && $item_count == 2 # two items, one comma && !$must_break_open ) { my $i_break = $rcomma_index->[0]; $self->set_forced_breakpoint($i_break); ${$rdo_not_break_apart} = 1; return; } # Section B2B: Shortcut method 2 is for most small ragged lists # which might look best if not displayed as a table. if ( ( $number_of_fields == 2 && $item_count == 3 ) || ( $new_identifier_count > 0 # isn't all quotes && $sparsity > 0.15 ) # would be fairly spaced gaps if aligned ) { my $break_count = $self->set_ragged_breakpoints( $ri_term_comma, $ri_ragged_break_list ); ++$break_count if ($use_separate_first_term); # NOTE: we should really use the true break count here, # which can be greater if there are large terms and # little space, but usually this will work well enough. if ( !$must_break_open ) { if ( $break_count <= 1 || ( $is_lp_formatting && !$need_lp_break_open ) ) { ${$rdo_not_break_apart} = 1; } } return; } } ## end shortcut methods # debug stuff DEBUG_SPARSE && do { # How many spaces across the page will we fill? my $columns_per_line = ( int $number_of_fields / 2 ) * $pair_width + ( $number_of_fields % 2 ) * $max_width; print {*STDOUT} "SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n"; }; #------------------------------------------------------------------ # Section B3: Compound List Rule 2: # If this list is too long for one line, and it is an item of a # larger list, then we must format it, regardless of sparsity # (ian.t). One reason that we have to do this is to trigger # Compound List Rule 1, above, which causes breaks at all commas of # all outer lists. In this way, the structure will be properly # displayed. #------------------------------------------------------------------ # Decide if this list is too long for one line unless broken my $total_columns = table_columns_available($i_opening_paren); my $too_long = $packed_columns > $total_columns; # For a paren list, include the length of the token just before the # '(' because this is likely a sub call, and we would have to # include the sub name on the same line as the list. This is still # imprecise, but not too bad. (steve.t) if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) { $too_long = $self->excess_line_length( $i_opening_minus, $i_effective_last_comma + 1 ) > 0; } # TODO: For an item after a '=>', try to include the length of the # thing before the '=>'. This is crude and should be improved by # actually looking back token by token. if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) { my $i_opening_minus_test = $i_opening_paren - 4; if ( $i_opening_minus >= 0 ) { $too_long = $self->excess_line_length( $i_opening_minus_test, $i_effective_last_comma + 1 ) > 0; } } # Always break lists contained in '[' and '{' if too long for 1 line, # and always break lists which are too long and part of a more complex # structure. my $must_break_open_container = $must_break_open || ( $too_long && ( $in_hierarchical_list || !$two_line_word_wrap_ok ) ); #-------------------------------------------------------------------- # Section B4: A table will work here. But do not attempt to align # columns if this is a tiny table or it would be too spaced. It # seems that the more packed lines we have, the sparser the list that # can be allowed and still look ok. #-------------------------------------------------------------------- if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines ) || ( $formatted_lines < 2 ) || ( $unused_columns > $max_allowed_sparsity * $formatted_columns ) ) { #---------------------------------------------------------------- # Section B4A: too sparse: would not look good aligned in a table #---------------------------------------------------------------- # use old breakpoints if this is a 'big' list if ( $packed_lines > 2 && $item_count > 10 ) { write_logfile_entry("List sparse: using old breakpoints\n"); $self->copy_old_breakpoints( $i_first_comma, $i_last_comma ); } # let the continuation logic handle it if 2 lines else { my $break_count = $self->set_ragged_breakpoints( $ri_term_comma, $ri_ragged_break_list ); ++$break_count if ($use_separate_first_term); if ( !$must_break_open_container ) { if ( $break_count <= 1 || ( $is_lp_formatting && !$need_lp_break_open ) ) { ${$rdo_not_break_apart} = 1; } } } return; } #-------------------------------------------- # Section B4B: Go ahead and format as a table #-------------------------------------------- $self->write_formatted_table( $number_of_fields, $comma_count, $rcomma_index, $use_separate_first_term ); return; } ## end sub break_multiline_list sub table_layout_A { my ($rhash_IN) = @_; # Find lengths of all list items needed to calculate page layout # Returns: # - nothing if this list is empty, or # - a ref to a hash containing some derived parameters my $i_opening_paren = $rhash_IN->{i_opening_paren}; my $i_closing_paren = $rhash_IN->{i_closing_paren}; my $identifier_count = $rhash_IN->{identifier_count}; my $rcomma_index = $rhash_IN->{rcomma_index}; my $item_count = $rhash_IN->{item_count}; # nothing to do if no commas seen return if ( $item_count < 1 ); my $i_first_comma = $rcomma_index->[0]; my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ]; my $i_last_comma = $i_true_last_comma; if ( $i_last_comma >= $max_index_to_go ) { $item_count -= 1; return if ( $item_count < 1 ); $i_last_comma = $rcomma_index->[ $item_count - 1 ]; } my $comma_count = $item_count; my $ritem_lengths = []; my $ri_term_begin = []; my $ri_term_end = []; my $ri_term_comma = []; my $rmax_length = [ 0, 0 ]; my $i_prev_plus; my $first_term_length; my $i = $i_opening_paren; my $is_odd = 1; foreach my $j ( 0 .. $comma_count - 1 ) { $is_odd = 1 - $is_odd; $i_prev_plus = $i + 1; $i = $rcomma_index->[$j]; my $i_term_end = ( $i == 0 || $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1; my $i_term_begin = ( $types_to_go[$i_prev_plus] eq 'b' ) ? $i_prev_plus + 1 : $i_prev_plus; push @{$ri_term_begin}, $i_term_begin; push @{$ri_term_end}, $i_term_end; push @{$ri_term_comma}, $i; # note: currently adding 2 to all lengths (for comma and space) my $length = 2 + token_sequence_length( $i_term_begin, $i_term_end ); push @{$ritem_lengths}, $length; if ( $j == 0 ) { $first_term_length = $length; } else { if ( $length > $rmax_length->[$is_odd] ) { $rmax_length->[$is_odd] = $length; } } } # now we have to make a distinction between the comma count and item # count, because the item count will be one greater than the comma # count if the last item is not terminated with a comma my $i_b = ( $types_to_go[ $i_last_comma + 1 ] eq 'b' ) ? $i_last_comma + 1 : $i_last_comma; my $i_e = ( $types_to_go[ $i_closing_paren - 1 ] eq 'b' ) ? $i_closing_paren - 2 : $i_closing_paren - 1; my $i_effective_last_comma = $i_last_comma; my $last_item_length = token_sequence_length( $i_b + 1, $i_e ); if ( $last_item_length > 0 ) { # add 2 to length because other lengths include a comma and a blank $last_item_length += 2; push @{$ritem_lengths}, $last_item_length; push @{$ri_term_begin}, $i_b + 1; push @{$ri_term_end}, $i_e; push @{$ri_term_comma}, undef; my $i_odd = $item_count % 2; if ( $last_item_length > $rmax_length->[$i_odd] ) { $rmax_length->[$i_odd] = $last_item_length; } $item_count++; $i_effective_last_comma = $i_e + 1; if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) { $identifier_count++; } } # be sure we do not extend beyond the current list length if ( $i_effective_last_comma >= $max_index_to_go ) { $i_effective_last_comma = $max_index_to_go - 1; } # Return the hash of derived variables. return { # Updated variables _item_count_A => $item_count, _identifier_count_A => $identifier_count, # New variables _ritem_lengths => $ritem_lengths, _ri_term_begin => $ri_term_begin, _ri_term_end => $ri_term_end, _ri_term_comma => $ri_term_comma, _rmax_length => $rmax_length, _comma_count => $comma_count, _i_effective_last_comma => $i_effective_last_comma, _first_term_length => $first_term_length, _i_first_comma => $i_first_comma, _i_last_comma => $i_last_comma, _i_true_last_comma => $i_true_last_comma, }; } ## end sub table_layout_A sub table_layout_B { my ( $self, $rhash_IN, $rhash_A, $is_lp_formatting ) = @_; # Determine variables for the best table layout, including # the best number of fields. # Returns: # - nothing if nothing more to do # - a ref to a hash containg some derived parameters # Variables from caller my $i_opening_paren = $rhash_IN->{i_opening_paren}; my $list_type = $rhash_IN->{list_type}; my $next_nonblank_type = $rhash_IN->{next_nonblank_type}; my $rcomma_index = $rhash_IN->{rcomma_index}; my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart}; # Table size variables my $comma_count = $rhash_A->{_comma_count}; my $first_term_length = $rhash_A->{_first_term_length}; my $i_effective_last_comma = $rhash_A->{_i_effective_last_comma}; my $i_first_comma = $rhash_A->{_i_first_comma}; my $identifier_count = $rhash_A->{_identifier_count_A}; my $item_count = $rhash_A->{_item_count_A}; my $ri_term_begin = $rhash_A->{_ri_term_begin}; my $ri_term_comma = $rhash_A->{_ri_term_comma}; my $ri_term_end = $rhash_A->{_ri_term_end}; my $ritem_lengths = $rhash_A->{_ritem_lengths}; my $rmax_length = $rhash_A->{_rmax_length}; # Specify if the list must have an even number of fields or not. # It is generally safest to assume an even number, because the # list items might be a hash list. But if we can be sure that # it is not a hash, then we can allow an odd number for more # flexibility. # 1 = odd field count ok, 2 = want even count my $odd_or_even = 2; if ( $identifier_count >= $item_count - 1 || $is_assignment{$next_nonblank_type} || ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ ) ) { $odd_or_even = 1; } # do we have a long first term which should be # left on a line by itself? my $use_separate_first_term = ( $odd_or_even == 1 # only if we can use 1 field/line && $item_count > 3 # need several items && $first_term_length > 2 * $rmax_length->[0] - 2 # need long first term && $first_term_length > 2 * $rmax_length->[1] - 2 # need long first term ); # or do we know from the type of list that the first term should # be placed alone? if ( !$use_separate_first_term ) { if ( $is_keyword_with_special_leading_term{$list_type} ) { $use_separate_first_term = 1; # should the container be broken open? if ( $item_count < 3 ) { if ( $i_first_comma - $i_opening_paren < 4 ) { ${$rdo_not_break_apart} = 1; } } elsif ($first_term_length < 20 && $i_first_comma - $i_opening_paren < 4 ) { my $columns = table_columns_available($i_first_comma); if ( $first_term_length < $columns ) { ${$rdo_not_break_apart} = 1; } } else { ## ok } } } # if so, if ($use_separate_first_term) { # ..set a break and update starting values $self->set_forced_breakpoint($i_first_comma); $item_count--; #--------------------------------------------------------------- # Section B1A: Stop if one item remains ($i_first_comma = undef) #--------------------------------------------------------------- # Fix for b1442: use '$item_count' here instead of '$comma_count' # to make the result independent of any trailing comma. return if ( $item_count <= 1 ); $i_opening_paren = $i_first_comma; $i_first_comma = $rcomma_index->[1]; shift @{$ritem_lengths}; shift @{$ri_term_begin}; shift @{$ri_term_end}; shift @{$ri_term_comma}; } # if not, update the metrics to include the first term else { if ( $first_term_length > $rmax_length->[0] ) { $rmax_length->[0] = $first_term_length; } } # Field width parameters my $pair_width = ( $rmax_length->[0] + $rmax_length->[1] ); my $max_width = ( $rmax_length->[0] > $rmax_length->[1] ) ? $rmax_length->[0] : $rmax_length->[1]; # Number of free columns across the page width for laying out tables my $columns = table_columns_available($i_first_comma); # Patch for b1210 and b1216-b1218 when -vmll is set. If we are unable # to break after an opening paren, then the maximum line length for the # first line could be less than the later lines. So we need to reduce # the line length. Normally, we will get a break after an opening # paren, but in some cases we might not. if ( $rOpts_variable_maximum_line_length && $tokens_to_go[$i_opening_paren] eq '(' && @{$ri_term_begin} ) { my $ib = $ri_term_begin->[0]; my $type = $types_to_go[$ib]; # So far, the only known instance of this problem is when # a bareword follows an opening paren with -vmll if ( $type eq 'w' ) { # If a line starts with paren+space+terms, then its max length # could be up to ci+2-i spaces less than if the term went out # on a line after the paren. So.. my $tol_w = max( 0, 2 + $rOpts_continuation_indentation - $rOpts_indent_columns ); $columns = max( 0, $columns - $tol_w ); ## Here is the original b1210 fix, but it failed on b1216-b1218 ##my $columns2 = table_columns_available($i_opening_paren); ##$columns = min( $columns, $columns2 ); } } # Estimated maximum number of fields which fit this space. # This will be our first guess: my $number_of_fields_max = maximum_number_of_fields( $columns, $odd_or_even, $max_width, $pair_width ); my $number_of_fields = $number_of_fields_max; # Find the best-looking number of fields. # This will be our second guess, if possible. my ( $number_of_fields_best, $ri_ragged_break_list, $new_identifier_count ) = $self->study_list_complexity( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ); if ( $number_of_fields_best != 0 && $number_of_fields_best < $number_of_fields_max ) { $number_of_fields = $number_of_fields_best; } # fix b1427 elsif ($number_of_fields_best > 1 && $number_of_fields_best > $number_of_fields_max ) { $number_of_fields_best = $number_of_fields_max; } else { ## ok } # If we are crowded and the -lp option is being used, try # to undo some indentation if ( $is_lp_formatting && ( $number_of_fields == 0 || ( $number_of_fields == 1 && $number_of_fields != $number_of_fields_best ) ) ) { ( $number_of_fields, $number_of_fields_best, $columns ) = $self->lp_table_fix( $columns, $i_first_comma, $max_width, $number_of_fields, $number_of_fields_best, $odd_or_even, $pair_width, $ritem_lengths, ); } # try for one column if two won't work if ( $number_of_fields <= 0 ) { $number_of_fields = int( $columns / $max_width ); } # The user can place an upper bound on the number of fields, # which can be useful for doing maintenance on tables if ( $rOpts_maximum_fields_per_table && $number_of_fields > $rOpts_maximum_fields_per_table ) { $number_of_fields = $rOpts_maximum_fields_per_table; } # How many columns (characters) and lines would this container take # if no additional whitespace were added? my $packed_columns = token_sequence_length( $i_opening_paren + 1, $i_effective_last_comma + 1 ); if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero my $packed_lines = 1 + int( $packed_columns / $columns ); #----------------------------------------------------------------- # Section B1B: Stop here if we did not compute a positive number of # fields. In this case we just have to bail out. #----------------------------------------------------------------- if ( $number_of_fields <= 0 ) { $self->set_emergency_comma_breakpoints( $number_of_fields_best, $rhash_IN, $comma_count, $i_first_comma, ); return; } #------------------------------------------------------------------ # Section B1B: We have a tentative field count that seems to work. # Now we must look more closely to determine if a table layout will # actually look okay. #------------------------------------------------------------------ # How many lines will this require? my $formatted_lines = $item_count / ($number_of_fields); if ( $formatted_lines != int $formatted_lines ) { $formatted_lines = 1 + int $formatted_lines; } # So far we've been trying to fill out to the right margin. But # compact tables are easier to read, so let's see if we can use fewer # fields without increasing the number of lines. $number_of_fields = compactify_table( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ); my $formatted_columns; if ( $number_of_fields > 1 ) { $formatted_columns = ( $pair_width * ( int( $item_count / 2 ) ) + ( $item_count % 2 ) * $max_width ); } else { $formatted_columns = $max_width * $item_count; } if ( $formatted_columns < $packed_columns ) { $formatted_columns = $packed_columns; } # Construce hash_B: return { # Updated variables _i_first_comma_B => $i_first_comma, _i_opening_paren_B => $i_opening_paren, _item_count_B => $item_count, # New variables _columns => $columns, _formatted_columns => $formatted_columns, _formatted_lines => $formatted_lines, _max_width => $max_width, _new_identifier_count => $new_identifier_count, _number_of_fields => $number_of_fields, _odd_or_even => $odd_or_even, _packed_columns => $packed_columns, _packed_lines => $packed_lines, _pair_width => $pair_width, _ri_ragged_break_list => $ri_ragged_break_list, _use_separate_first_term => $use_separate_first_term, }; } ## end sub table_layout_B sub lp_table_fix { # try to undo some -lp indentation to improve table formatting my ( $self, # $columns, $i_first_comma, $max_width, $number_of_fields, $number_of_fields_best, $odd_or_even, $pair_width, $ritem_lengths, ) = @_; my $available_spaces = $self->get_available_spaces_to_go($i_first_comma); if ( $available_spaces > 0 ) { my $spaces_wanted = $max_width - $columns; # for 1 field if ( $number_of_fields_best == 0 ) { $number_of_fields_best = get_maximum_fields_wanted($ritem_lengths); } if ( $number_of_fields_best != 1 ) { my $spaces_wanted_2 = 1 + $pair_width - $columns; # for 2 fields if ( $available_spaces > $spaces_wanted_2 ) { $spaces_wanted = $spaces_wanted_2; } } if ( $spaces_wanted > 0 ) { my $deleted_spaces = $self->reduce_lp_indentation( $i_first_comma, $spaces_wanted ); # redo the math if ( $deleted_spaces > 0 ) { $columns = table_columns_available($i_first_comma); $number_of_fields = maximum_number_of_fields( $columns, $odd_or_even, $max_width, $pair_width ); if ( $number_of_fields_best == 1 && $number_of_fields >= 1 ) { $number_of_fields = $number_of_fields_best; } } } } return ( $number_of_fields, $number_of_fields_best, $columns ); } ## end sub lp_table_fix sub write_formatted_table { # Write a table of comma separated items with fixed number of fields my ( $self, $number_of_fields, $comma_count, $rcomma_index, $use_separate_first_term ) = @_; write_logfile_entry( "List: auto formatting with $number_of_fields fields/row\n"); my $j_first_break = $use_separate_first_term ? $number_of_fields : $number_of_fields - 1; my $j = $j_first_break; while ( $j < $comma_count ) { my $i_comma = $rcomma_index->[$j]; $self->set_forced_breakpoint($i_comma); $j += $number_of_fields; } return; } ## end sub write_formatted_table } ## end closure set_comma_breakpoint_final sub study_list_complexity { # Look for complex tables which should be formatted with one term per line. # Returns the following: # # \@i_ragged_break_list = list of good breakpoints to avoid lines # which are hard to read # $number_of_fields_best = suggested number of fields based on # complexity; = 0 if any number may be used. # my ( $self, $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_; my $item_count = @{$ri_term_begin}; my $complex_item_count = 0; my $number_of_fields_best = $rOpts_maximum_fields_per_table; my $i_max = @{$ritem_lengths} - 1; ##my @item_complexity; my $i_last_last_break = -3; my $i_last_break = -2; my @i_ragged_break_list; my $definitely_complex = 30; my $definitely_simple = 12; my $quote_count = 0; for my $i ( 0 .. $i_max ) { my $ib = $ri_term_begin->[$i]; my $ie = $ri_term_end->[$i]; # define complexity: start with the actual term length my $weighted_length = ( $ritem_lengths->[$i] - 2 ); ##TBD: join types here and check for variations ##my $str=join "", @tokens_to_go[$ib..$ie]; my $is_quote = 0; if ( $types_to_go[$ib] =~ /^[qQ]$/ ) { $is_quote = 1; $quote_count++; } elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) { $quote_count++; } else { ## ok } if ( $ib eq $ie ) { if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) { $complex_item_count++; $weighted_length *= 2; } else { } } else { if ( first { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) { $complex_item_count++; $weighted_length *= 2; } if ( first { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) { $weighted_length += 4; } } # add weight for extra tokens. $weighted_length += 2 * ( $ie - $ib ); ## my $BUB = join '', @tokens_to_go[$ib..$ie]; ## print "# COMPLEXITY:$weighted_length $BUB\n"; ##push @item_complexity, $weighted_length; # now mark a ragged break after this item it if it is 'long and # complex': if ( $weighted_length >= $definitely_complex ) { # if we broke after the previous term # then break before it too if ( $i_last_break == $i - 1 && $i > 1 && $i_last_last_break != $i - 2 ) { ## TODO: don't strand a small term pop @i_ragged_break_list; push @i_ragged_break_list, $i - 2; push @i_ragged_break_list, $i - 1; } push @i_ragged_break_list, $i; $i_last_last_break = $i_last_break; $i_last_break = $i; } # don't break before a small last term -- it will # not look good on a line by itself. elsif ($i == $i_max && $i_last_break == $i - 1 && $weighted_length <= $definitely_simple ) { pop @i_ragged_break_list; } else { ## ok } } my $identifier_count = $i_max + 1 - $quote_count; # Need more tuning here.. if ( $max_width > 12 && $complex_item_count > $item_count / 2 && $number_of_fields_best != 2 ) { $number_of_fields_best = 1; } return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count ); } ## end sub study_list_complexity sub get_maximum_fields_wanted { # Not all tables look good with more than one field of items. # This routine looks at a table and decides if it should be # formatted with just one field or not. # This coding is still under development. my ($ritem_lengths) = @_; my $number_of_fields_best = 0; # For just a few items, we tentatively assume just 1 field. my $item_count = @{$ritem_lengths}; if ( $item_count <= 5 ) { $number_of_fields_best = 1; } # For larger tables, look at it both ways and see what looks best else { my $is_odd = 1; my @max_length = ( 0, 0 ); my @last_length_2 = ( undef, undef ); my @first_length_2 = ( undef, undef ); my $last_length = undef; my $total_variation_1 = 0; my $total_variation_2 = 0; my @total_variation_2 = ( 0, 0 ); foreach my $j ( 0 .. $item_count - 1 ) { $is_odd = 1 - $is_odd; my $length = $ritem_lengths->[$j]; if ( $length > $max_length[$is_odd] ) { $max_length[$is_odd] = $length; } if ( defined($last_length) ) { my $dl = abs( $length - $last_length ); $total_variation_1 += $dl; } $last_length = $length; my $ll = $last_length_2[$is_odd]; if ( defined($ll) ) { my $dl = abs( $length - $ll ); $total_variation_2[$is_odd] += $dl; } else { $first_length_2[$is_odd] = $length; } $last_length_2[$is_odd] = $length; } $total_variation_2 = $total_variation_2[0] + $total_variation_2[1]; my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0; if ( $total_variation_2 >= $factor * $total_variation_1 ) { $number_of_fields_best = 1; } } return ($number_of_fields_best); } ## end sub get_maximum_fields_wanted sub table_columns_available { my $i_first_comma = shift; my $columns = $maximum_line_length_at_level[ $levels_to_go[$i_first_comma] ] - leading_spaces_to_go($i_first_comma); # Patch: the vertical formatter does not line up lines whose lengths # exactly equal the available line length because of allowances # that must be made for side comments. Therefore, the number of # available columns is reduced by 1 character. $columns -= 1; return $columns; } ## end sub table_columns_available sub maximum_number_of_fields { # how many fields will fit in the available space? my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_; my $max_pairs = int( $columns / $pair_width ); my $number_of_fields = $max_pairs * 2; if ( $odd_or_even == 1 && $max_pairs * $pair_width + $max_width <= $columns ) { $number_of_fields++; } return $number_of_fields; } ## end sub maximum_number_of_fields sub compactify_table { # given a table with a certain number of fields and a certain number # of lines, see if reducing the number of fields will make it look # better. my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_; if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) { my $min_fields = $number_of_fields; while ($min_fields >= $odd_or_even && $min_fields * $formatted_lines >= $item_count ) { $number_of_fields = $min_fields; $min_fields -= $odd_or_even; } } return $number_of_fields; } ## end sub compactify_table sub set_ragged_breakpoints { # Set breakpoints in a list that cannot be formatted nicely as a # table. my ( $self, $ri_term_comma, $ri_ragged_break_list ) = @_; my $break_count = 0; foreach ( @{$ri_ragged_break_list} ) { my $j = $ri_term_comma->[$_]; if ($j) { $self->set_forced_breakpoint($j); $break_count++; } } return $break_count; } ## end sub set_ragged_breakpoints sub copy_old_breakpoints { my ( $self, $i_first_comma, $i_last_comma ) = @_; # We are formatting a list and have decided to make comma breaks # the same as in the input file. for my $i ( $i_first_comma .. $i_last_comma ) { if ( $old_breakpoint_to_go[$i] ) { # If the comma style is under certain controls, and if this is a # comma breakpoint with the comma is at the beginning of the next # line, then we must pass that index instead. This will allow sub # set_forced_breakpoints to check and follow the user settings. This # produces a uniform style and can prevent instability (b1422). # # The flag '$controlled_comma_style' will be set if the user # entered any of -wbb=',' -wba=',' -kbb=',' -kba=','. It is not # needed or set for the -boc flag. my $ibreak = $i; if ( $types_to_go[$ibreak] ne ',' && $controlled_comma_style ) { my $index = $inext_to_go[$ibreak]; if ( $index > $ibreak && $types_to_go[$index] eq ',' ) { $ibreak = $index; } } $self->set_forced_breakpoint($ibreak); } } return; } ## end sub copy_old_breakpoints sub set_nobreaks { my ( $self, $i, $j ) = @_; if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) { 0 && do { my ( $a, $b, $c ) = caller(); print {*STDOUT} "NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"; }; @nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 ); } # shouldn't happen; non-critical error else { if (DEVEL_MODE) { my ( $a, $b, $c ) = caller(); Fault(<<EOM); NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go EOM } } return; } ## end sub set_nobreaks ############################################### # CODE SECTION 12: Code for setting indentation ############################################### sub token_sequence_length { # return length of tokens ($ibeg .. $iend) including $ibeg & $iend my ( $ibeg, $iend ) = @_; # fix possible negative starting index if ( $ibeg < 0 ) { $ibeg = 0 } # returns 0 if index range is empty (some subs assume this) if ( $ibeg > $iend ) { return 0; } return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg]; } ## end sub token_sequence_length sub total_line_length { # return length of a line of tokens ($ibeg .. $iend) my ( $ibeg, $iend ) = @_; # get the leading spaces on this line ... my $spaces = $leading_spaces_to_go[$ibeg]; if ( ref($spaces) ) { $spaces = $spaces->get_spaces() } # ... then add the net token length return $spaces + $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg]; } ## end sub total_line_length sub excess_line_length { # return number of characters by which a line of tokens ($ibeg..$iend) # exceeds the allowable line length. # NOTE: profiling shows that efficiency of this routine is essential. my ( $self, $ibeg, $iend, $ignore_right_weld ) = @_; # Start with the leading spaces on this line ... my $excess = $leading_spaces_to_go[$ibeg]; if ( ref($excess) ) { $excess = $excess->get_spaces() } # ... and include right weld lengths unless requested not to if ( $total_weld_count && $type_sequence_to_go[$iend] && !$ignore_right_weld ) { my $wr = $self->[_rweld_len_right_at_K_]->{ $K_to_go[$iend] }; $excess += $wr if defined($wr); } # ... then add the net token length, minus the maximum length return $excess + $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg] - $maximum_line_length_at_level[ $levels_to_go[$ibeg] ]; } ## end sub excess_line_length sub get_spaces { # return the number of leading spaces associated with an indentation # variable $indentation is either a constant number of spaces or an object # with a get_spaces method. my $indentation = shift; return ref($indentation) ? $indentation->get_spaces() : $indentation; } ## end sub get_spaces sub get_recoverable_spaces { # return the number of spaces (+ means shift right, - means shift left) # that we would like to shift a group of lines with the same indentation # to get them to line up with their opening parens my $indentation = shift; return ref($indentation) ? $indentation->get_recoverable_spaces() : 0; } ## end sub get_recoverable_spaces sub get_available_spaces_to_go { my ( $self, $ii ) = @_; my $item = $leading_spaces_to_go[$ii]; # return the number of available leading spaces associated with an # indentation variable. $indentation is either a constant number of # spaces or an object with a get_available_spaces method. return ref($item) ? $item->get_available_spaces() : 0; } ## end sub get_available_spaces_to_go { ## begin closure set_lp_indentation use constant DEBUG_LP => 0; # Stack of -lp index objects which survives between batches. my $rLP; my $max_lp_stack; # The predicted position of the next opening container which may start # an -lp indentation level. This survives between batches. my $lp_position_predictor; BEGIN { # Index names for the -lp stack variables. # Do not combine with other BEGIN blocks (c101). my $i = 0; use constant { _lp_ci_level_ => $i++, _lp_level_ => $i++, _lp_object_ => $i++, _lp_container_seqno_ => $i++, _lp_space_count_ => $i++, }; } ## end BEGIN sub initialize_lp_vars { # initialize gnu variables for a new file; # must be called once at the start of a new file. $lp_position_predictor = 0; $max_lp_stack = 0; # we can turn off -lp if all levels will be at or above the cutoff if ( $high_stress_level <= 1 ) { $rOpts_line_up_parentheses = 0; $rOpts_extended_line_up_parentheses = 0; } # fix for b1459: -naws adds stress for -xlp if ( $high_stress_level <= 2 && !$rOpts_add_whitespace ) { $rOpts_extended_line_up_parentheses = 0; } $rLP = []; # initialize the leading whitespace stack to negative levels # so that we can never run off the end of the stack $rLP->[$max_lp_stack]->[_lp_ci_level_] = -1; $rLP->[$max_lp_stack]->[_lp_level_] = -1; $rLP->[$max_lp_stack]->[_lp_object_] = undef; $rLP->[$max_lp_stack]->[_lp_container_seqno_] = SEQ_ROOT; $rLP->[$max_lp_stack]->[_lp_space_count_] = 0; return; } ## end sub initialize_lp_vars # hashes for efficient testing my %hash_test1; my %hash_test2; my %hash_test3; BEGIN { my @q = qw< } ) ] >; @hash_test1{@q} = (1) x scalar(@q); @q = qw(: ? f); push @q, ','; @hash_test2{@q} = (1) x scalar(@q); @q = qw( . || && ); @hash_test3{@q} = (1) x scalar(@q); } ## end BEGIN # shared variables, re-initialized for each batch my $rlp_object_list; my $max_lp_object_list; my %lp_comma_count; my %lp_arrow_count; my $space_count; my $current_level; my $current_ci_level; my $ii_begin_line; my $in_lp_mode; my $stack_changed; my $K_last_nonblank; my $last_nonblank_token; my $last_nonblank_type; my $last_last_nonblank_type; sub set_lp_indentation { my ($self) = @_; #------------------------------------------------------------------ # Define the leading whitespace for all tokens in the current batch # when the -lp formatting is selected. #------------------------------------------------------------------ # Returns number of tokens in this batch which have leading spaces # defined by an lp object: my $lp_object_count_this_batch = 0; # Safety check, should not be needed: if ( !$rOpts_line_up_parentheses || !defined($max_index_to_go) || $max_index_to_go < 0 ) { return $lp_object_count_this_batch; } # List of -lp indentation objects created in this batch $rlp_object_list = []; $max_lp_object_list = -1; %lp_comma_count = (); %lp_arrow_count = (); $space_count = undef; $current_level = undef; $current_ci_level = undef; $ii_begin_line = 0; $in_lp_mode = 0; $stack_changed = 1; $K_last_nonblank = undef; $last_nonblank_token = EMPTY_STRING; $last_nonblank_type = EMPTY_STRING; $last_last_nonblank_type = EMPTY_STRING; my %last_lp_equals = (); my $rLL = $self->[_rLL_]; my $starting_in_quote = $self->[_this_batch_]->[_starting_in_quote_]; my $imin = 0; # The 'starting_in_quote' flag means that the first token is the first # token of a line and it is also the continuation of some kind of # multi-line quote or pattern. It must have no added leading # whitespace, so we can skip it. if ($starting_in_quote) { $imin += 1; } my $Kpnb = $K_to_go[0] - 1; if ( $Kpnb > 0 && $rLL->[$Kpnb]->[_TYPE_] eq 'b' ) { $Kpnb -= 1; } if ( $Kpnb >= 0 && $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { $K_last_nonblank = $Kpnb; } if ( defined($K_last_nonblank) ) { $last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_]; $last_nonblank_type = $rLL->[$K_last_nonblank]->[_TYPE_]; } #----------------------------------- # Loop over all tokens in this batch #----------------------------------- foreach my $ii ( $imin .. $max_index_to_go ) { my $type = $types_to_go[$ii]; my $token = $tokens_to_go[$ii]; my $level = $levels_to_go[$ii]; my $ci_level = $ci_levels_to_go[$ii]; my $total_depth = $nesting_depth_to_go[$ii]; # get the top state from the stack if it has changed if ($stack_changed) { my $rLP_top = $rLP->[$max_lp_stack]; my $lp_object = $rLP_top->[_lp_object_]; if ($lp_object) { ( $space_count, $current_level, $current_ci_level ) = @{ $lp_object->get_spaces_level_ci() }; } else { $current_ci_level = $rLP_top->[_lp_ci_level_]; $current_level = $rLP_top->[_lp_level_]; $space_count = $rLP_top->[_lp_space_count_]; } $stack_changed = 0; } #------------------------------------------------------------ # Break at a previous '=' if necessary to control line length #------------------------------------------------------------ if ( $type eq '{' || $type eq '(' ) { $lp_comma_count{ $total_depth + 1 } = 0; $lp_arrow_count{ $total_depth + 1 } = 0; # If we come to an opening token after an '=' token of some # type, see if it would be helpful to 'break' after the '=' to # save space my $ii_last_equals = $last_lp_equals{$total_depth}; if ($ii_last_equals) { $self->lp_equals_break_check( $ii, $ii_last_equals ); } } #------------------------ # Handle decreasing depth #------------------------ # Note that one token may have both decreasing and then increasing # depth. For example, (level, ci) can go from (1,1) to (2,0). So, # in this example we would first go back to (1,0) then up to (2,0) # in a single call. if ( $level < $current_level || $ci_level < $current_ci_level ) { $self->lp_decreasing_depth($ii); } #------------------------ # handle increasing depth #------------------------ if ( $level > $current_level || $ci_level > $current_ci_level ) { $self->lp_increasing_depth($ii); } #------------------ # Handle all tokens #------------------ if ( $type ne 'b' ) { # Count commas and look for non-list characters. Once we see a # non-list character, we give up and don't look for any more # commas. if ( $type eq '=>' ) { $lp_arrow_count{$total_depth}++; # remember '=>' like '=' for estimating breaks (but see # above note for b1035) $last_lp_equals{$total_depth} = $ii; } elsif ( $type eq ',' ) { $lp_comma_count{$total_depth}++; } elsif ( $is_assignment{$type} ) { $last_lp_equals{$total_depth} = $ii; } else { ## not a special type } # this token might start a new line if .. if ( $ii > $ii_begin_line && ( # this is the first nonblank token of the line $ii == 1 && $types_to_go[0] eq 'b' # or previous character was one of these: # /^([\:\?\,f])$/ || $hash_test2{$last_nonblank_type} # or previous character was opening and this is not # closing || ( $last_nonblank_type eq '{' && $type ne '}' ) || ( $last_nonblank_type eq '(' and $type ne ')' ) # or this token is one of these: # /^([\.]|\|\||\&\&)$/ || $hash_test3{$type} # or this is a closing structure || ( $last_nonblank_type eq '}' && $last_nonblank_token eq $last_nonblank_type ) # or previous token was keyword 'return' || ( $last_nonblank_type eq 'k' && ( $last_nonblank_token eq 'return' && $type ne '{' ) ) # or starting a new line at certain keywords is fine || ( $type eq 'k' && $is_if_unless_and_or_last_next_redo_return{ $token} ) # or this is after an assignment after a closing # structure || ( $is_assignment{$last_nonblank_type} && ( # /^[\}\)\]]$/ $hash_test1{$last_last_nonblank_type} # and it is significantly to the right || $lp_position_predictor > ( $maximum_line_length_at_level[$level] - $rOpts_maximum_line_length / 2 ) ) ) ) ) { check_for_long_gnu_style_lines($ii); $ii_begin_line = $ii; # back up 1 token if we want to break before that type # otherwise, we may strand tokens like '?' or ':' on a line if ( $ii_begin_line > 0 ) { my $wbb = $last_nonblank_type eq 'k' ? $want_break_before{$last_nonblank_token} : $want_break_before{$last_nonblank_type}; $ii_begin_line-- if ($wbb); } } $K_last_nonblank = $K_to_go[$ii]; $last_last_nonblank_type = $last_nonblank_type; $last_nonblank_type = $type; $last_nonblank_token = $token; } ## end if ( $type ne 'b' ) # remember the predicted position of this token on the output line if ( $ii > $ii_begin_line ) { ## NOTE: this is a critical loop - the following call has been ## expanded for about 2x speedup: ## $lp_position_predictor = ## total_line_length( $ii_begin_line, $ii ); my $indentation = $leading_spaces_to_go[$ii_begin_line]; if ( ref($indentation) ) { $indentation = $indentation->get_spaces(); } $lp_position_predictor = $indentation + $summed_lengths_to_go[ $ii + 1 ] - $summed_lengths_to_go[$ii_begin_line]; } else { $lp_position_predictor = $space_count + $token_lengths_to_go[$ii]; } # Store the indentation object for this token. # This allows us to manipulate the leading whitespace # (in case we have to reduce indentation to fit a line) without # having to change any token values. #--------------------------------------------------------------- # replace leading whitespace with indentation objects where used #--------------------------------------------------------------- if ( $rLP->[$max_lp_stack]->[_lp_object_] ) { $lp_object_count_this_batch++; my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_]; $leading_spaces_to_go[$ii] = $lp_object; if ( $max_lp_stack > 0 && $ci_level && $rLP->[ $max_lp_stack - 1 ]->[_lp_object_] ) { $reduced_spaces_to_go[$ii] = $rLP->[ $max_lp_stack - 1 ]->[_lp_object_]; } else { $reduced_spaces_to_go[$ii] = $lp_object; } } } ## end loop over all tokens in this batch undo_incomplete_lp_indentation() if ( !$rOpts_extended_line_up_parentheses ); return $lp_object_count_this_batch; } ## end sub set_lp_indentation sub lp_equals_break_check { my ( $self, $ii, $ii_last_equals ) = @_; # If we come to an opening token after an '=' token of some # type, see if it would be helpful to 'break' after the '=' to # save space. # Given: # $ii = index of an opening token in the output batch # $ii_begin_line = index of token starting next output line # Update: # $lp_position_predictor - updated position predictor # $ii_begin_line = updated starting token index # Skip an empty set of parens, such as after channel(): # my $exchange = $self->_channel()->exchange( # This fixes issues b1318 b1322 b1323 b1328 my $is_empty_container; if ( $ii_last_equals && $ii < $max_index_to_go ) { my $seqno = $type_sequence_to_go[$ii]; my $inext_nb = $ii + 1; $inext_nb++ if ( $types_to_go[$inext_nb] eq 'b' ); my $seqno_nb = $type_sequence_to_go[$inext_nb]; $is_empty_container = $seqno && $seqno_nb && $seqno_nb == $seqno; } if ( $ii_last_equals && $ii_last_equals > $ii_begin_line && !$is_empty_container ) { my $seqno = $type_sequence_to_go[$ii]; # find the position if we break at the '=' my $i_test = $ii_last_equals; # Fix for issue b1229, check if want break before this token # Fix for issue b1356, if i_test is a blank, the leading spaces may # be incorrect (if it was an interline blank). # Fix for issue b1357 .. b1370, i_test must be prev nonblank # ( the ci value for blanks can vary ) # See also case b223 # Fix for issue b1371-b1374 : all of these and the above are fixed # by simply backing up one index and setting the leading spaces of # a blank equal to that of the equals. if ( $want_break_before{ $types_to_go[$i_test] } ) { $i_test -= 1; $leading_spaces_to_go[$i_test] = $leading_spaces_to_go[$ii_last_equals] if ( $types_to_go[$i_test] eq 'b' ); } elsif ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ } else { ## ok } my $test_position = total_line_length( $i_test, $ii ); my $mll = $maximum_line_length_at_level[ $levels_to_go[$i_test] ]; #------------------------------------------------------ # Break if structure will reach the maximum line length #------------------------------------------------------ # Historically, -lp just used one-half line length here my $len_increase = $rOpts_maximum_line_length / 2; # For -xlp, we can also use the pre-computed lengths my $min_len = $self->[_rcollapsed_length_by_seqno_]->{$seqno}; if ( $min_len && $min_len > $len_increase ) { $len_increase = $min_len; } if ( # if we might exceed the maximum line length $lp_position_predictor + $len_increase > $mll # if a -bbx flag WANTS a break before this opening token || ( $seqno && $self->[_rbreak_before_container_by_seqno_]->{$seqno} ) # or we are beyond the 1/4 point and there was an old # break at an assignment (not '=>') [fix for b1035] || ( $lp_position_predictor > $mll - $rOpts_maximum_line_length * 3 / 4 && $types_to_go[$ii_last_equals] ne '=>' && ( $old_breakpoint_to_go[$ii_last_equals] || ( $ii_last_equals > 0 && $old_breakpoint_to_go[ $ii_last_equals - 1 ] ) || ( $ii_last_equals > 1 && $types_to_go[ $ii_last_equals - 1 ] eq 'b' && $old_breakpoint_to_go[ $ii_last_equals - 2 ] ) ) ) ) { # then make the switch -- note that we do not set a # real breakpoint here because we may not really need # one; sub break_lists will do that if necessary. my $Kc = $self->[_K_closing_container_]->{$seqno}; if ( # For -lp, only if the closing token is in this # batch (c117). Otherwise it cannot be done by sub # break_lists. defined($Kc) && $Kc <= $K_to_go[$max_index_to_go] # For -xlp, we only need one nonblank token after # the opening token. || $rOpts_extended_line_up_parentheses ) { $ii_begin_line = $i_test + 1; $lp_position_predictor = $test_position; #-------------------------------------------------- # Fix for an opening container terminating a batch: #-------------------------------------------------- # To get alignment of a -lp container with its # contents, we have to put a break after $i_test. # For $ii<$max_index_to_go, this will be done by # sub break_lists based on the indentation object. # But for $ii=$max_index_to_go, the indentation # object for this seqno will not be created until # the next batch, so we have to set a break at # $i_test right now in order to get one. if ( $ii == $max_index_to_go && !$block_type_to_go[$ii] && $types_to_go[$ii] eq '{' && $seqno && !$self->[_ris_excluded_lp_container_]->{$seqno} ) { $self->set_forced_lp_break( $ii_begin_line, $ii ); } } } } return; } ## end sub lp_equals_break_check sub lp_decreasing_depth { my ( $self, $ii ) = @_; my $rLL = $self->[_rLL_]; my $level = $levels_to_go[$ii]; my $ci_level = $ci_levels_to_go[$ii]; # loop to find the first entry at or completely below this level while (1) { # Be sure we have not hit the stack bottom - should never # happen because only negative levels can get here, and # $level was forced to be positive above. if ( !$max_lp_stack ) { # non-fatal, just keep going except in DEVEL_MODE if (DEVEL_MODE) { Fault(<<EOM); program bug with -lp: stack_error. level=$level; ci_level=$ci_level; rerun with -nlp EOM } last; } # save index of token which closes this level if ( $rLP->[$max_lp_stack]->[_lp_object_] ) { my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_]; $lp_object->set_closed($ii); my $comma_count = 0; my $arrow_count = 0; my $type = $types_to_go[$ii]; if ( $type eq '}' || $type eq ')' ) { my $total_depth = $nesting_depth_to_go[$ii]; $comma_count = $lp_comma_count{$total_depth}; $arrow_count = $lp_arrow_count{$total_depth}; $comma_count = 0 unless $comma_count; $arrow_count = 0 unless $arrow_count; } $lp_object->set_comma_count($comma_count); $lp_object->set_arrow_count($arrow_count); # Undo any extra indentation if we saw no commas my $available_spaces = $lp_object->get_available_spaces(); my $K_start = $lp_object->get_K_begin_line(); if ( $available_spaces > 0 && $K_start >= $K_to_go[0] && ( $comma_count <= 0 || $arrow_count > 0 ) ) { my $i = $lp_object->get_lp_item_index(); # Safety check for a valid stack index. It # should be ok because we just checked that the # index K of the token associated with this # indentation is in this batch. if ( $i < 0 || $i > $max_lp_object_list ) { my $KK = $K_to_go[$ii]; my $lno = $rLL->[$KK]->[_LINE_INDEX_]; DEVEL_MODE && Fault(<<EOM); Program bug with -lp near line $lno. Stack index i=$i should be >=0 and <= max=$max_lp_object_list EOM last; } if ( $arrow_count == 0 ) { $rlp_object_list->[$i] ->permanently_decrease_available_spaces( $available_spaces); } else { $rlp_object_list->[$i] ->tentatively_decrease_available_spaces( $available_spaces); } foreach my $j ( $i + 1 .. $max_lp_object_list ) { $rlp_object_list->[$j] ->decrease_SPACES($available_spaces); } } } # go down one level --$max_lp_stack; my $rLP_top = $rLP->[$max_lp_stack]; my $ci_lev = $rLP_top->[_lp_ci_level_]; my $lev = $rLP_top->[_lp_level_]; my $spaces = $rLP_top->[_lp_space_count_]; if ( $rLP_top->[_lp_object_] ) { my $lp_obj = $rLP_top->[_lp_object_]; ( $spaces, $lev, $ci_lev ) = @{ $lp_obj->get_spaces_level_ci() }; } # stop when we reach a level at or below the current # level if ( $lev <= $level && $ci_lev <= $ci_level ) { $space_count = $spaces; $current_level = $lev; $current_ci_level = $ci_lev; last; } } return; } ## end sub lp_decreasing_depth sub lp_increasing_depth { my ( $self, $ii ) = @_; my $rLL = $self->[_rLL_]; my $type = $types_to_go[$ii]; my $level = $levels_to_go[$ii]; my $ci_level = $ci_levels_to_go[$ii]; $stack_changed = 1; # Compute the standard incremental whitespace. This will be # the minimum incremental whitespace that will be used. This # choice results in a smooth transition between the gnu-style # and the standard style. my $standard_increment = ( $level - $current_level ) * $rOpts_indent_columns + ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation; # Now we have to define how much extra incremental space # ("$available_space") we want. This extra space will be # reduced as necessary when long lines are encountered or when # it becomes clear that we do not have a good list. my $available_spaces = 0; my $align_seqno = 0; my $K_extra_space; my $last_nonblank_seqno; my $last_nonblank_block_type; if ( defined($K_last_nonblank) ) { $last_nonblank_seqno = $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_]; $last_nonblank_block_type = $last_nonblank_seqno ? $self->[_rblock_type_of_seqno_]->{$last_nonblank_seqno} : undef; } $in_lp_mode = $rLP->[$max_lp_stack]->[_lp_object_]; #----------------------------------------------- # Initialize indentation spaces on empty stack.. #----------------------------------------------- if ( $max_lp_stack == 0 ) { $space_count = $level * $rOpts_indent_columns; } #---------------------------------------- # Add the standard space increment if ... #---------------------------------------- elsif ( # if this is a BLOCK, add the standard increment $last_nonblank_block_type # or if this is not a sequenced item || !$last_nonblank_seqno # or this container is excluded by user rules # or contains here-docs or multiline qw text || defined($last_nonblank_seqno) && $self->[_ris_excluded_lp_container_]->{$last_nonblank_seqno} # or if last nonblank token was not structural indentation || $last_nonblank_type ne '{' # and do not start -lp under stress .. fixes b1244, b1255 || !$in_lp_mode && $level >= $high_stress_level ) { # If we have entered lp mode, use the top lp object to get # the current indentation spaces because it may have # changed. Fixes b1285, b1286. if ($in_lp_mode) { $space_count = $in_lp_mode->get_spaces(); } $space_count += $standard_increment; } #--------------------------------------------------------------- # -lp mode: try to use space to the first non-blank level change #--------------------------------------------------------------- else { # see how much space we have available my $test_space_count = $lp_position_predictor; my $excess = 0; my $min_len = $self->[_rcollapsed_length_by_seqno_]->{$last_nonblank_seqno}; my $next_opening_too_far; if ( defined($min_len) ) { $excess = $test_space_count + $min_len - $maximum_line_length_at_level[$level]; if ( $excess > 0 ) { $test_space_count -= $excess; # will the next opening token be a long way out? $next_opening_too_far = $lp_position_predictor + $excess > $maximum_line_length_at_level[$level]; } } my $rLP_top = $rLP->[$max_lp_stack]; my $min_gnu_indentation = $rLP_top->[_lp_space_count_]; if ( $rLP_top->[_lp_object_] ) { $min_gnu_indentation = $rLP_top->[_lp_object_]->get_spaces(); } $available_spaces = $test_space_count - $min_gnu_indentation; # Do not startup -lp indentation mode if no space ... # ... or if it puts the opening far to the right if ( !$in_lp_mode && ( $available_spaces <= 0 || $next_opening_too_far ) ) { $space_count += $standard_increment; $available_spaces = 0; } # Use -lp mode else { $space_count = $test_space_count; $in_lp_mode = 1; if ( $available_spaces >= $standard_increment ) { $min_gnu_indentation += $standard_increment; } elsif ( $available_spaces > 1 ) { $min_gnu_indentation += $available_spaces + 1; # The "+1" space can cause mis-alignment if there is no # blank space between the opening paren and the next # nonblank token (i.e., -pt=2) and the container does not # get broken open. So we will mark this token for later # space removal by sub 'xlp_tweak' if this container # remains intact (issue git #106). if ( $type ne 'b' # Skip if the maximum line length is exceeded here && $excess <= 0 # This is only for level changes, not ci level changes. # But note: this test is here out of caution but I have # not found a case where it is actually necessary. && $is_opening_token{$last_nonblank_token} # Be sure we are at consecutive nonblanks. This test # should be true, but it guards against future coding # changes to level values assigned to blank spaces. && $ii > 0 && $types_to_go[ $ii - 1 ] ne 'b' ) { $K_extra_space = $K_to_go[$ii]; } } elsif ( $is_opening_token{$last_nonblank_token} ) { if ( ( $tightness{$last_nonblank_token} < 2 ) ) { $min_gnu_indentation += 2; } else { $min_gnu_indentation += 1; } } else { $min_gnu_indentation += $standard_increment; } $available_spaces = $space_count - $min_gnu_indentation; if ( $available_spaces < 0 ) { $space_count = $min_gnu_indentation; $available_spaces = 0; } $align_seqno = $last_nonblank_seqno; } } #------------------------------------------- # update the state, but not on a blank token #------------------------------------------- if ( $type ne 'b' ) { if ( $rLP->[$max_lp_stack]->[_lp_object_] ) { $rLP->[$max_lp_stack]->[_lp_object_]->set_have_child(1); $in_lp_mode = 1; } #---------------------------------------- # Create indentation object if in lp-mode #---------------------------------------- ++$max_lp_stack; my $lp_object; if ($in_lp_mode) { # A negative level implies not to store the item in the # item_list my $lp_item_index = 0; if ( $level >= 0 ) { $lp_item_index = ++$max_lp_object_list; } my $K_begin_line = 0; if ( $ii_begin_line >= 0 && $ii_begin_line <= $max_index_to_go ) { $K_begin_line = $K_to_go[$ii_begin_line]; } # Minor Fix: when creating indentation at a side # comment we don't know what the space to the actual # next code token will be. We will allow a space for # sub correct_lp to move it in if necessary. if ( $type eq '#' && $max_index_to_go > 0 && $align_seqno ) { $available_spaces += 1; } my $standard_spaces = $leading_spaces_to_go[$ii]; $lp_object = Perl::Tidy::IndentationItem->new( spaces => $space_count, level => $level, ci_level => $ci_level, available_spaces => $available_spaces, lp_item_index => $lp_item_index, align_seqno => $align_seqno, K_begin_line => $K_begin_line, standard_spaces => $standard_spaces, K_extra_space => $K_extra_space, ); DEBUG_LP && do { my $tok_beg = $rLL->[$K_begin_line]->[_TOKEN_]; my $token = $tokens_to_go[$ii]; print {*STDOUT} <<EOM; DEBUG_LP: Created object at tok=$token type=$type for seqno $align_seqno level=$level ci=$ci_level spaces=$space_count avail=$available_spaces kbeg=$K_begin_line tokbeg=$tok_beg lp=$lp_position_predictor EOM }; if ( $level >= 0 ) { $rlp_object_list->[$max_lp_object_list] = $lp_object; } if ( $is_opening_token{$last_nonblank_token} && $last_nonblank_seqno ) { $self->[_rlp_object_by_seqno_]->{$last_nonblank_seqno} = $lp_object; } } #------------------------------------ # Store this indentation on the stack #------------------------------------ $rLP->[$max_lp_stack]->[_lp_ci_level_] = $ci_level; $rLP->[$max_lp_stack]->[_lp_level_] = $level; $rLP->[$max_lp_stack]->[_lp_object_] = $lp_object; $rLP->[$max_lp_stack]->[_lp_container_seqno_] = $last_nonblank_seqno; $rLP->[$max_lp_stack]->[_lp_space_count_] = $space_count; # If the opening paren is beyond the half-line length, then # we will use the minimum (standard) indentation. This will # help avoid problems associated with running out of space # near the end of a line. As a result, in deeply nested # lists, there will be some indentations which are limited # to this minimum standard indentation. But the most deeply # nested container will still probably be able to shift its # parameters to the right for proper alignment, so in most # cases this will not be noticeable. if ( $available_spaces > 0 && $lp_object ) { my $halfway = $maximum_line_length_at_level[$level] - $rOpts_maximum_line_length / 2; $lp_object->tentatively_decrease_available_spaces( $available_spaces) if ( $space_count > $halfway ); } } return; } ## end sub lp_increasing_depth sub check_for_long_gnu_style_lines { # look at the current estimated maximum line length, and # remove some whitespace if it exceeds the desired maximum my ($ii_to_go) = @_; # nothing can be done if no stack items defined for this line return if ( $max_lp_object_list < 0 ); # See if we have exceeded the maximum desired line length .. # keep 2 extra free because they are needed in some cases # (result of trial-and-error testing) my $tol = 2; # But reduce tol to 0 at a terminal comma; fixes b1432 if ( $tokens_to_go[$ii_to_go] eq ',' && $ii_to_go < $max_index_to_go ) { my $in = $ii_to_go + 1; if ( $types_to_go[$in] eq 'b' && $in < $max_index_to_go ) { $in++ } if ( $is_closing_token{ $tokens_to_go[$in] } ) { $tol = 0; } } my $spaces_needed = $lp_position_predictor - $maximum_line_length_at_level[ $levels_to_go[$ii_to_go] ] + $tol; return if ( $spaces_needed <= 0 ); # We are over the limit, so try to remove a requested number of # spaces from leading whitespace. We are only allowed to remove # from whitespace items created on this batch, since others have # already been used and cannot be undone. my @candidates = (); # loop over all whitespace items created for the current batch foreach my $i ( 0 .. $max_lp_object_list ) { my $item = $rlp_object_list->[$i]; # item must still be open to be a candidate (otherwise it # cannot influence the current token) next if ( $item->get_closed() >= 0 ); my $available_spaces = $item->get_available_spaces(); if ( $available_spaces > 0 ) { push( @candidates, [ $i, $available_spaces ] ); } } return unless (@candidates); # sort by available whitespace so that we can remove whitespace # from the maximum available first. @candidates = sort { $b->[1] <=> $a->[1] || $a->[0] <=> $b->[0] } @candidates; # keep removing whitespace until we are done or have no more foreach my $candidate (@candidates) { my ( $i, $available_spaces ) = @{$candidate}; my $deleted_spaces = ( $available_spaces > $spaces_needed ) ? $spaces_needed : $available_spaces; # remove the incremental space from this item $rlp_object_list->[$i]->decrease_available_spaces($deleted_spaces); my $i_debug = $i; # update the leading whitespace of this item and all items # that came after it $i -= 1; while ( ++$i <= $max_lp_object_list ) { my $old_spaces = $rlp_object_list->[$i]->get_spaces(); if ( $old_spaces >= $deleted_spaces ) { $rlp_object_list->[$i]->decrease_SPACES($deleted_spaces); } # shouldn't happen except for code bug: else { # non-fatal, keep going except in DEVEL_MODE if (DEVEL_MODE) { my $level = $rlp_object_list->[$i_debug]->get_level(); my $ci_level = $rlp_object_list->[$i_debug]->get_ci_level(); my $old_level = $rlp_object_list->[$i]->get_level(); my $old_ci_level = $rlp_object_list->[$i]->get_ci_level(); Fault(<<EOM); program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level EOM } } } $lp_position_predictor -= $deleted_spaces; $spaces_needed -= $deleted_spaces; last if ( $spaces_needed <= 0 ); } return; } ## end sub check_for_long_gnu_style_lines sub undo_incomplete_lp_indentation { #------------------------------------------------------------------ # Undo indentation for all incomplete -lp indentation levels of the # current batch unless -xlp is set. #------------------------------------------------------------------ # This routine is called once after each output stream batch is # finished to undo indentation for all incomplete -lp indentation # levels. If this routine is called then comments and blank lines will # disrupt this indentation style. In older versions of perltidy this # was always done because it could cause problems otherwise, but recent # improvements allow fairly good results to be obtained by skipping # this step with the -xlp flag. # nothing to do if no stack items defined for this line return if ( $max_lp_object_list < 0 ); # loop over all whitespace items created for the current batch foreach my $i ( 0 .. $max_lp_object_list ) { my $item = $rlp_object_list->[$i]; # only look for open items next if ( $item->get_closed() >= 0 ); # Tentatively remove all of the available space # (The vertical aligner will try to get it back later) my $available_spaces = $item->get_available_spaces(); if ( $available_spaces > 0 ) { # delete incremental space for this item $rlp_object_list->[$i] ->tentatively_decrease_available_spaces($available_spaces); # Reduce the total indentation space of any nodes that follow # Note that any such nodes must necessarily be dependents # of this node. foreach ( $i + 1 .. $max_lp_object_list ) { $rlp_object_list->[$_]->decrease_SPACES($available_spaces); } } } return; } ## end sub undo_incomplete_lp_indentation } ## end closure set_lp_indentation #---------------------------------------------------------------------- # sub to set a requested break before an opening container in -lp mode. #---------------------------------------------------------------------- sub set_forced_lp_break { my ( $self, $i_begin_line, $i_opening ) = @_; # Given: # $i_begin_line = index of break in the _to_go arrays # $i_opening = index of the opening container # Set any requested break at a token before this opening container # token. This is often an '=' or '=>' but can also be things like # '.', ',', 'return'. It was defined by sub set_lp_indentation. # Important: # For intact containers, call this at the closing token. # For broken containers, call this at the opening token. # This will avoid needless breaks when it turns out that the # container does not actually get broken. This isn't known until # the closing container for intact blocks. return if ( $i_begin_line < 0 || $i_begin_line > $max_index_to_go ); # Handle request to put a break break immediately before this token. # We may not want to do that since we are also breaking after it. if ( $i_begin_line == $i_opening ) { # The following rules should be reviewed. We may want to always # allow the break. If we do not do the break, the indentation # may be off. # RULE: don't break before it unless it is welded to a qw. # This works well, but we may want to relax this to allow # breaks in additional cases. return if ( !$self->[_rK_weld_right_]->{ $K_to_go[$i_opening] } ); return unless ( $types_to_go[$max_index_to_go] eq 'q' ); } # Only break for breakpoints at the same # indentation level as the opening paren my $test1 = $nesting_depth_to_go[$i_opening]; my $test2 = $nesting_depth_to_go[$i_begin_line]; return if ( $test2 != $test1 ); # Back up at a blank (fixes case b932) my $ibr = $i_begin_line - 1; if ( $ibr > 0 && $types_to_go[$ibr] eq 'b' ) { $ibr--; } if ( $ibr >= 0 ) { my $i_nonblank = $self->set_forced_breakpoint($ibr); # Crude patch to prevent sub recombine_breakpoints from undoing # this break, especially after an '='. It will leave old # breakpoints alone. See c098/x045 for some examples. if ( defined($i_nonblank) ) { $old_breakpoint_to_go[$i_nonblank] = 1; } } return; } ## end sub set_forced_lp_break sub reduce_lp_indentation { # reduce the leading whitespace at token $i if possible by $spaces_needed # (a large value of $spaces_needed will remove all excess space) # NOTE: to be called from break_lists only for a sequence of tokens # contained between opening and closing parens/braces/brackets my ( $self, $i, $spaces_wanted ) = @_; my $deleted_spaces = 0; my $item = $leading_spaces_to_go[$i]; my $available_spaces = $item->get_available_spaces(); if ( $available_spaces > 0 && ( ( $spaces_wanted <= $available_spaces ) || !$item->get_have_child() ) ) { # we'll remove these spaces, but mark them as recoverable $deleted_spaces = $item->tentatively_decrease_available_spaces($spaces_wanted); } return $deleted_spaces; } ## end sub reduce_lp_indentation ########################################################### # CODE SECTION 13: Preparing batches for vertical alignment ########################################################### sub check_convey_batch_input { # Check for valid input to sub convey_batch_to_vertical_aligner. An # error here would most likely be due to an error in the calling # routine 'sub grind_batch_of_CODE'. my ( $self, $ri_first, $ri_last ) = @_; if ( !defined($ri_first) || !defined($ri_last) ) { Fault(<<EOM); Undefined line ranges ri_first and/r ri_last EOM } my $nmax = @{$ri_first} - 1; my $nmax_check = @{$ri_last} - 1; if ( $nmax < 0 || $nmax_check < 0 || $nmax != $nmax_check ) { Fault(<<EOM); Line range index error: nmax=$nmax but nmax_check=$nmax_check These should be equal and >=0 EOM } my ( $ibeg, $iend ); foreach my $n ( 0 .. $nmax ) { my $ibeg_m = $ibeg; my $iend_m = $iend; $ibeg = $ri_first->[$n]; $iend = $ri_last->[$n]; if ( $ibeg < 0 || $iend < $ibeg || $iend > $max_index_to_go ) { Fault(<<EOM); Bad line range at line index $n of $nmax: ibeg=$ibeg, iend=$iend These should have iend >= ibeg and be in the range (0..$max_index_to_go) EOM } next if ( $n == 0 ); if ( $ibeg <= $iend_m ) { Fault(<<EOM); Line ranges overlap: iend=$iend_m at line $n-1 but ibeg=$ibeg for line $n EOM } } return; } ## end sub check_convey_batch_input sub convey_batch_to_vertical_aligner { my ($self) = @_; # This routine receives a batch of code for which the final line breaks # have been defined. Here we prepare the lines for passing to the vertical # aligner. We do the following tasks: # - mark certain vertical alignment tokens, such as '=', in each line # - make final indentation adjustments # - do logical padding: insert extra blank spaces to help display certain # logical constructions # - send the line to the vertical aligner my $rLL = $self->[_rLL_]; my $Klimit = $self->[_Klimit_]; my $ris_list_by_seqno = $self->[_ris_list_by_seqno_]; my $this_batch = $self->[_this_batch_]; my $do_not_pad = $this_batch->[_do_not_pad_]; my $starting_in_quote = $this_batch->[_starting_in_quote_]; my $ending_in_quote = $this_batch->[_ending_in_quote_]; my $is_static_block_comment = $this_batch->[_is_static_block_comment_]; my $batch_CODE_type = $this_batch->[_batch_CODE_type_]; my $ri_first = $this_batch->[_ri_first_]; my $ri_last = $this_batch->[_ri_last_]; $self->check_convey_batch_input( $ri_first, $ri_last ) if (DEVEL_MODE); my $n_last_line = @{$ri_first} - 1; my $ibeg_next = $ri_first->[0]; my $iend_next = $ri_last->[0]; my $type_beg_next = $types_to_go[$ibeg_next]; my $type_end_next = $types_to_go[$iend_next]; my $token_beg_next = $tokens_to_go[$ibeg_next]; my $rindentation_list = [0]; # ref to indentations for each line my ( $cscw_block_comment, $closing_side_comment, $is_block_comment ); if ( !$max_index_to_go && $type_beg_next eq '#' ) { $is_block_comment = 1; } if ($rOpts_closing_side_comments) { ( $closing_side_comment, $cscw_block_comment ) = $self->add_closing_side_comment( $ri_first, $ri_last ); } if ( $n_last_line > 0 || $rOpts_extended_continuation_indentation ) { $self->undo_ci( $ri_first, $ri_last, $this_batch->[_rix_seqno_controlling_ci_] ); } # for multi-line batches ... if ( $n_last_line > 0 ) { # flush before a long if statement to avoid unwanted alignment $self->flush_vertical_aligner() if ( $type_beg_next eq 'k' && $is_if_unless{$token_beg_next} ); $self->set_logical_padding( $ri_first, $ri_last, $starting_in_quote ) if ($rOpts_logical_padding); $self->xlp_tweak( $ri_first, $ri_last ) if ($rOpts_extended_line_up_parentheses); } if (DEVEL_MODE) { $self->check_batch_summed_lengths() } # ---------------------------------------------------------- # define the vertical alignments for all lines of this batch # ---------------------------------------------------------- my $rline_alignments; if ( !$max_index_to_go ) { # Optional shortcut for single token ... # = [ [ $rtokens, $rfields, $rpatterns, $rfield_lengths ] ]; $rline_alignments = [ [ [], [ $tokens_to_go[0] ], [ $types_to_go[0] ], [ $summed_lengths_to_go[1] - $summed_lengths_to_go[0] ], ] ]; } else { $rline_alignments = $self->make_vertical_alignments( $ri_first, $ri_last ); } # ---------------------------------------------- # loop to send each line to the vertical aligner # ---------------------------------------------- my ( $type_beg, $type_end, $token_beg, $ljump ); for my $n ( 0 .. $n_last_line ) { # ---------------------------------------------------------------- # This hash will hold the args for vertical alignment of this line # We will populate it as we go. # ---------------------------------------------------------------- my $rvao_args = {}; my $type_beg_last = $type_beg; my $type_end_last = $type_end; my $ibeg = $ibeg_next; my $iend = $iend_next; my $Kbeg = $K_to_go[$ibeg]; my $Kend = $K_to_go[$iend]; $type_beg = $type_beg_next; $type_end = $type_end_next; $token_beg = $token_beg_next; # --------------------------------------------------- # Define the check value 'Kend' to send for this line # --------------------------------------------------- # The 'Kend' value is an integer for checking that lines come out of # the far end of the pipeline in the right order. It increases # linearly along the token stream. But we only send ending K values of # non-comments down the pipeline. This is equivalent to checking that # the last CODE_type is blank or equal to 'VER'. See also sub # resync_lines_and_tokens for related coding. Note that # '$batch_CODE_type' is the code type of the line to which the ending # token belongs. my $Kend_code = $batch_CODE_type && $batch_CODE_type ne 'VER' ? undef : $Kend; # Get some vars on line [n+1], if any, # and define $ljump = level jump needed by 'sub get_final_indentation' if ( $n < $n_last_line ) { $ibeg_next = $ri_first->[ $n + 1 ]; $iend_next = $ri_last->[ $n + 1 ]; $type_beg_next = $types_to_go[$ibeg_next]; $type_end_next = $types_to_go[$iend_next]; $token_beg_next = $tokens_to_go[$ibeg_next]; my $Kbeg_next = $K_to_go[$ibeg_next]; $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_]; } elsif ( !$is_block_comment && $Kend < $Klimit ) { # Patch for git #51, a bare closing qw paren was not outdented # if the flag '-nodelete-old-newlines is set # Note that we are just looking ahead for the next nonblank # character. We could scan past an arbitrary number of block # comments or hanging side comments by calling K_next_code, but it # could add significant run time with very little to be gained. my $Kbeg_next = $Kend + 1; if ( $Kbeg_next < $Klimit && $rLL->[$Kbeg_next]->[_TYPE_] eq 'b' ) { $Kbeg_next += 1; } $ljump = $rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_]; } else { $ljump = 0; } # --------------------------------------------- # get the vertical alignment info for this line # --------------------------------------------- # The lines are broken into fields which can be spaced by the vertical # to achieve vertical alignment. These fields are the actual text # which will be output, so from here on no more changes can be made to # the text. my $rline_alignment = $rline_alignments->[$n]; my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) = @{$rline_alignment}; # Programming check: (shouldn't happen) # The number of tokens which separate the fields must always be # one less than the number of fields. If this is not true then # an error has been introduced in sub make_alignment_patterns. if (DEVEL_MODE) { if ( @{$rfields} && ( @{$rtokens} != ( @{$rfields} - 1 ) ) ) { my $nt = @{$rtokens}; my $nf = @{$rfields}; my $msg = <<EOM; Program bug in Perl::Tidy::Formatter, probably in sub 'make_alignment_patterns': The number of tokens = $nt should be one less than number of fields: $nf EOM Fault($msg); } } # -------------------------------------- # get the final indentation of this line # -------------------------------------- my ( $indentation, $lev, $level_end, $i_terminal, $is_outdented_line, ) = $self->get_final_indentation( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last, $rindentation_list, $ljump, $starting_in_quote, $is_static_block_comment, ); # -------------------------------- # define flag 'outdent_long_lines' # -------------------------------- if ( # we will allow outdenting of long lines.. # which are long quotes, if allowed ( $type_beg eq 'Q' && $rOpts_outdent_long_quotes ) # which are long block comments, if allowed || ( $type_beg eq '#' && $rOpts_outdent_long_comments # but not if this is a static block comment && !$is_static_block_comment ) ) { $rvao_args->{outdent_long_lines} = 1; # convert -lp indentation objects to spaces to allow outdenting if ( ref($indentation) ) { $indentation = $indentation->get_spaces(); } } # -------------------------------------------------- # define flags 'break_alignment_before' and '_after' # -------------------------------------------------- # These flags tell the vertical aligner to stop alignment before or # after this line. if ($is_outdented_line) { $rvao_args->{break_alignment_before} = 1; $rvao_args->{break_alignment_after} = 1; } elsif ($do_not_pad) { $rvao_args->{break_alignment_before} = 1; } # flush at an 'if' which follows a line with (1) terminal semicolon # or (2) terminal block_type which is not an 'if'. This prevents # unwanted alignment between the lines. elsif ( $type_beg eq 'k' && $token_beg eq 'if' ) { my $type_m = 'b'; my $block_type_m; if ( $Kbeg > 0 ) { my $Km = $Kbeg - 1; $type_m = $rLL->[$Km]->[_TYPE_]; if ( $type_m eq 'b' && $Km > 0 ) { $Km -= 1; $type_m = $rLL->[$Km]->[_TYPE_]; } if ( $type_m eq '#' && $Km > 0 ) { $Km -= 1; $type_m = $rLL->[$Km]->[_TYPE_]; if ( $type_m eq 'b' && $Km > 0 ) { $Km -= 1; $type_m = $rLL->[$Km]->[_TYPE_]; } } my $seqno_m = $rLL->[$Km]->[_TYPE_SEQUENCE_]; if ($seqno_m) { $block_type_m = $self->[_rblock_type_of_seqno_]->{$seqno_m}; } } # break after anything that is not if-like if ( $type_m eq ';' || ( $type_m eq '}' && $block_type_m && $block_type_m ne 'if' && $block_type_m ne 'unless' && $block_type_m ne 'elsif' && $block_type_m ne 'else' ) ) { $rvao_args->{break_alignment_before} = 1; } } else { ## ok - do not need to break vertical alignment here } # ---------------------------------- # define 'rvertical_tightness_flags' # ---------------------------------- # These flags tell the vertical aligner if/when to combine consecutive # lines, based on the user input parameters. $rvao_args->{rvertical_tightness_flags} = $self->set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last, $ending_in_quote, $closing_side_comment ) unless ( $is_block_comment || $self->[_no_vertical_tightness_flags_] ); # ---------------------------------- # define 'is_terminal_ternary' flag # ---------------------------------- # This flag is set at the final ':' of a ternary chain to request # vertical alignment of the final term. Here is a slightly complex # example: # # $self->{_text} = ( # !$section ? '' # : $type eq 'item' ? "the $section entry" # : "the section on $section" # ) # . ( # $page # ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage" # : ' elsewhere in this document' # ); # if ( $type_beg eq ':' || $n > 0 && $type_end_last eq ':' ) { my $is_terminal_ternary = 0; my $last_leading_type = $n > 0 ? $type_beg_last : ':'; my $terminal_type = $types_to_go[$i_terminal]; if ( $terminal_type ne ';' && $n_last_line > $n && $level_end == $lev ) { my $Kbeg_next = $K_to_go[$ibeg_next]; $level_end = $rLL->[$Kbeg_next]->[_LEVEL_]; $terminal_type = $rLL->[$Kbeg_next]->[_TYPE_]; } if ( $last_leading_type eq ':' && ( ( $terminal_type eq ';' && $level_end <= $lev ) || ( $terminal_type ne ':' && $level_end < $lev ) ) ) { # the terminal term must not contain any ternary terms, as in # my $ECHO = ( # $Is_MSWin32 ? ".\\echo$$" # : $Is_MacOS ? ":echo$$" # : ( $Is_NetWare ? "echo$$" : "./echo$$" ) # ); $is_terminal_ternary = 1; my $KP = $rLL->[$Kbeg]->[_KNEXT_SEQ_ITEM_]; while ( defined($KP) && $KP <= $Kend ) { my $type_KP = $rLL->[$KP]->[_TYPE_]; if ( $type_KP eq '?' || $type_KP eq ':' ) { $is_terminal_ternary = 0; last; } $KP = $rLL->[$KP]->[_KNEXT_SEQ_ITEM_]; } } $rvao_args->{is_terminal_ternary} = $is_terminal_ternary; } # ------------------------------------------------- # add any new closing side comment to the last line # ------------------------------------------------- if ( $closing_side_comment && $n == $n_last_line && @{$rfields} ) { $rfields->[-1] .= " $closing_side_comment"; # NOTE: Patch for csc. We can just use 1 for the length of the csc # because its length should not be a limiting factor from here on. $rfield_lengths->[-1] += 2; # repack $rline_alignment = [ $rtokens, $rfields, $rpatterns, $rfield_lengths ]; } # ------------------------ # define flag 'list_seqno' # ------------------------ # This flag indicates if this line is contained in a multi-line list if ( !$is_block_comment ) { my $parent_seqno = $parent_seqno_to_go[$ibeg]; $rvao_args->{list_seqno} = $ris_list_by_seqno->{$parent_seqno}; } # The alignment tokens have been marked with nesting_depths, so we need # to pass nesting depths to the vertical aligner. They remain invariant # under all formatting operations. Previously, level values were sent # to the aligner. But they can be altered in welding and other # operations, and this can lead to alignment errors. my $nesting_depth_beg = $nesting_depth_to_go[$ibeg]; my $nesting_depth_end = $nesting_depth_to_go[$iend]; # A quirk in the definition of nesting depths is that the closing token # has the same depth as internal tokens. The vertical aligner is # programmed to expect them to have the lower depth, so we fix this. if ( $is_closing_type{ $types_to_go[$ibeg] } ) { $nesting_depth_beg-- } if ( $is_closing_type{ $types_to_go[$iend] } ) { $nesting_depth_end-- } # Adjust nesting depths to keep -lp indentation for qw lists. This is # required because qw lists contained in brackets do not get nesting # depths, but the vertical aligner is watching nesting depth changes to # decide if a -lp block is intact. Without this patch, qw lists # enclosed in angle brackets will not get the correct -lp indentation. # Looking for line with isolated qw ... if ( $rOpts_line_up_parentheses && $type_beg eq 'q' && $ibeg == $iend ) { # ... which is part of a multiline qw my $Km = $self->K_previous_nonblank($Kbeg); my $Kp = $self->K_next_nonblank($Kbeg); if ( defined($Km) && $rLL->[$Km]->[_TYPE_] eq 'q' || defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' ) { $nesting_depth_beg++; $nesting_depth_end++; } } # --------------------------------- # define flag 'forget_side_comment' # --------------------------------- # This flag tells the vertical aligner to reset the side comment # location if we are entering a new block from level 0. This is # intended to keep side comments from drifting too far to the right. if ( $block_type_to_go[$i_terminal] && $nesting_depth_end > $nesting_depth_beg ) { $rvao_args->{forget_side_comment} = !$self->[_radjusted_levels_]->[$Kbeg]; } # ----------------------------------- # Store the remaining non-flag values # ----------------------------------- $rvao_args->{Kend} = $Kend_code; $rvao_args->{ci_level} = $ci_levels_to_go[$ibeg]; $rvao_args->{indentation} = $indentation; $rvao_args->{level_end} = $nesting_depth_end; $rvao_args->{level} = $nesting_depth_beg; $rvao_args->{rline_alignment} = $rline_alignment; $rvao_args->{maximum_line_length} = $maximum_line_length_at_level[ $levels_to_go[$ibeg] ]; # -------------------------------------- # send this line to the vertical aligner # -------------------------------------- my $vao = $self->[_vertical_aligner_object_]; $vao->valign_input($rvao_args); $do_not_pad = 0; } ## end of loop to output each line # Set flag indicating if the last line ends in an opening # token and is very short, so that a blank line is not # needed if the subsequent line is a comment. # Examples of what we are looking for: # { # && ( # BEGIN { # default { # sub { $self->[_last_output_short_opening_token_] # line ends in opening token # /^[\{\(\[L]$/ = $is_opening_type{$type_end} # and either && ( # line has either single opening token $iend_next == $ibeg_next # or is a single token followed by opening token. # Note that sub identifiers have blanks like 'sub doit' # $token_beg !~ /\s+/ || ( $iend_next - $ibeg_next <= 2 && index( $token_beg, SPACE ) < 0 ) ) # and limit total to 10 character widths && token_sequence_length( $ibeg_next, $iend_next ) <= 10; # remember indentation of lines containing opening containers for # later use by sub get_final_indentation $self->save_opening_indentation( $ri_first, $ri_last, $rindentation_list, $this_batch->[_runmatched_opening_indexes_] ) if ( $this_batch->[_runmatched_opening_indexes_] || $types_to_go[$max_index_to_go] eq 'q' ); # output any new -cscw block comment if ($cscw_block_comment) { $self->flush_vertical_aligner(); my $file_writer_object = $self->[_file_writer_object_]; $file_writer_object->write_code_line( $cscw_block_comment . "\n" ); } return; } ## end sub convey_batch_to_vertical_aligner sub check_batch_summed_lengths { my ( $self, $msg ) = @_; $msg = EMPTY_STRING unless defined($msg); my $rLL = $self->[_rLL_]; # Verify that the summed lengths are correct. We want to be sure that # errors have not been introduced by programming changes. Summed lengths # are defined in sub store_token. Operations like padding and unmasking # semicolons can change token lengths, but those operations are expected to # update the summed lengths when they make changes. So the summed lengths # should always be correct. foreach my $i ( 0 .. $max_index_to_go ) { my $len_by_sum = $summed_lengths_to_go[ $i + 1 ] - $summed_lengths_to_go[$i]; my $len_tok_i = $token_lengths_to_go[$i]; my $KK = $K_to_go[$i]; my $len_tok_K; # For --indent-only, there is not always agreement between # token lengths in _rLL_ and token_lengths_to_go, so skip that check. if ( defined($KK) && !$rOpts_indent_only ) { $len_tok_K = $rLL->[$KK]->[_TOKEN_LENGTH_]; } if ( $len_by_sum != $len_tok_i || defined($len_tok_K) && $len_by_sum != $len_tok_K ) { my $lno = defined($KK) ? $rLL->[$KK]->[_LINE_INDEX_] + 1 : "undef"; $KK = 'undef' unless defined($KK); my $tok = $tokens_to_go[$i]; my $type = $types_to_go[$i]; Fault(<<EOM); Summed lengths are appear to be incorrect. $msg lengths disagree: token length by sum=$len_by_sum but token_length_to_go[$i] = $len_tok_i and rLL->[$KK]->[_TOKEN_LENGTH_]=$len_tok_K near line $lno starting with '$tokens_to_go[0]..' at token i=$i K=$KK token_type='$type' token='$tok' EOM } } return; } ## end sub check_batch_summed_lengths { ## begin closure set_vertical_alignment_markers my %is_vertical_alignment_type; my %is_not_vertical_alignment_token; my %is_vertical_alignment_keyword; my %is_terminal_alignment_type; my %is_low_level_alignment_token; BEGIN { my @q; # Replaced =~ and // in the list. // had been removed in RT 119588 @q = qw# = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= { ? : => && || ~~ !~~ =~ !~ // <=> -> #; @is_vertical_alignment_type{@q} = (1) x scalar(@q); # These 'tokens' are not aligned. We need this to remove [ # from the above list because it has type ='{' @q = qw([); @is_not_vertical_alignment_token{@q} = (1) x scalar(@q); # these are the only types aligned at a line end @q = qw(&& || =>); @is_terminal_alignment_type{@q} = (1) x scalar(@q); # these tokens only align at line level @q = ( '{', '(' ); @is_low_level_alignment_token{@q} = (1) x scalar(@q); # eq and ne were removed from this list to improve alignment chances @q = qw(if unless and or err for foreach while until); @is_vertical_alignment_keyword{@q} = (1) x scalar(@q); } ## end BEGIN my $ralignment_type_to_go; my $ralignment_counts; my $ralignment_hash_by_line; sub set_vertical_alignment_markers { my ( $self, $ri_first, $ri_last ) = @_; #---------------------------------------------------------------------- # This routine looks at output lines for certain tokens which can serve # as vertical alignment markers (such as an '='). #---------------------------------------------------------------------- # Input parameters: # $ri_first = ref to list of starting line indexes in _to_go arrays # $ri_last = ref to list of ending line indexes in _to_go arrays # Method: We look at each token $i in this output batch and set # $ralignment_type_to_go->[$i] equal to those tokens at which we would # accept vertical alignment. # Initialize closure (and return) variables: $ralignment_type_to_go = []; $ralignment_counts = []; $ralignment_hash_by_line = []; # NOTE: closing side comments can insert up to 2 additional tokens # beyond the original $max_index_to_go, so we need to check ri_last for # the last index. my $max_line = @{$ri_first} - 1; my $max_i = $ri_last->[$max_line]; if ( $max_i < $max_index_to_go ) { $max_i = $max_index_to_go } # ----------------------------------------------------------------- # Shortcut: # - no alignments if there is only 1 token. # - and nothing to do if we aren't allowed to change whitespace. # ----------------------------------------------------------------- if ( $max_i <= 0 || !$rOpts_add_whitespace ) { goto RETURN; } # ------------------------------- # First handle any side comment. # ------------------------------- my $i_terminal = $max_i; if ( $types_to_go[$max_i] eq '#' ) { # We know $max_i > 0 if we get here. $i_terminal -= 1; if ( $i_terminal > 0 && $types_to_go[$i_terminal] eq 'b' ) { $i_terminal -= 1; } my $token = $tokens_to_go[$max_i]; my $KK = $K_to_go[$max_i]; # Do not align various special side comments my $do_not_align = ( # it is any specially marked side comment ( defined($KK) && $self->[_rspecial_side_comment_type_]->{$KK} ) # or it is a static side comment || ( $rOpts->{'static-side-comments'} && $token =~ /$static_side_comment_pattern/ ) # or a closing side comment || ( $types_to_go[$i_terminal] eq '}' && $tokens_to_go[$i_terminal] eq '}' && $token =~ /$closing_side_comment_prefix_pattern/ ) ); # - For the specific combination -vc -nvsc, we put all side comments # at fixed locations. Note that we will lose hanging side comment # alignments. Otherwise, hsc's can move to strange locations. # - For -nvc -nvsc we make all side comments vertical alignments # because the vertical aligner will check for -nvsc and be able # to reduce the final padding to the side comments for long lines. # and keep hanging side comments aligned. if ( !$do_not_align && !$rOpts_valign_side_comments && $rOpts_valign_code ) { $do_not_align = 1; my $ipad = $max_i - 1; if ( $types_to_go[$ipad] eq 'b' ) { my $pad_spaces = $rOpts->{'minimum-space-to-comment'} - $token_lengths_to_go[$ipad]; $self->pad_token( $ipad, $pad_spaces ); } } if ( !$do_not_align ) { $ralignment_type_to_go->[$max_i] = '#'; $ralignment_hash_by_line->[$max_line]->{$max_i} = '#'; $ralignment_counts->[$max_line]++; } } # ---------------------------------------------- # Nothing more to do on this line if -nvc is set # ---------------------------------------------- if ( !$rOpts_valign_code ) { goto RETURN; } # ------------------------------------- # Loop over each line of this batch ... # ------------------------------------- foreach my $line ( 0 .. $max_line ) { my $ibeg = $ri_first->[$line]; my $iend = $ri_last->[$line]; next if ( $iend <= $ibeg ); # back up before any side comment if ( $iend > $i_terminal ) { $iend = $i_terminal } #---------------------------------- # Loop over all tokens on this line #---------------------------------- $self->set_vertical_alignment_markers_token_loop( $line, $ibeg, $iend ); } RETURN: return ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line ); } ## end sub set_vertical_alignment_markers sub set_vertical_alignment_markers_token_loop { my ( $self, $line, $ibeg, $iend ) = @_; # Set vertical alignment markers for the tokens on one line # of the current output batch. This is done by updating the # three closure variables: # $ralignment_type_to_go # $ralignment_counts # $ralignment_hash_by_line # Input parameters: # $line = index of this line in the current batch # $ibeg, $iend = index range of tokens to check in the _to_go arrays my $level_beg = $levels_to_go[$ibeg]; my $token_beg = $tokens_to_go[$ibeg]; my $type_beg = $types_to_go[$ibeg]; my $type_beg_special_char = ( $type_beg eq '.' || $type_beg eq ':' || $type_beg eq '?' ); my $last_vertical_alignment_BEFORE_index = -1; my $vert_last_nonblank_type = $type_beg; my $vert_last_nonblank_token = $token_beg; # ---------------------------------------------------------------- # Initialization code merged from 'sub delete_needless_alignments' # ---------------------------------------------------------------- my $i_good_paren = -1; my $i_elsif_close = $ibeg - 1; my $i_elsif_open = $iend + 1; my @imatch_list; if ( $type_beg eq 'k' ) { # Initialization for paren patch: mark a location of a paren we # should keep, such as one following something like a leading # 'if', 'elsif', $i_good_paren = $ibeg + 1; if ( $types_to_go[$i_good_paren] eq 'b' ) { $i_good_paren++; } # Initialization for 'elsif' patch: remember the paren range of # an elsif, and do not make alignments within them because this # can cause loss of padding and overall brace alignment in the # vertical aligner. if ( $token_beg eq 'elsif' && $i_good_paren < $iend && $tokens_to_go[$i_good_paren] eq '(' ) { $i_elsif_open = $i_good_paren; $i_elsif_close = $mate_index_to_go[$i_good_paren]; if ( !defined($i_elsif_close) ) { $i_elsif_close = -1 } } } ## end if ( $type_beg eq 'k' ) # -------------------------------------------- # Loop over each token in this output line ... # -------------------------------------------- foreach my $i ( $ibeg + 1 .. $iend ) { next if ( $types_to_go[$i] eq 'b' ); my $type = $types_to_go[$i]; my $token = $tokens_to_go[$i]; my $alignment_type = EMPTY_STRING; # ---------------------------------------------- # Check for 'paren patch' : Remove excess parens # ---------------------------------------------- # Excess alignment of parens can prevent other good alignments. # For example, note the parens in the first two rows of the # following snippet. They would normally get marked for # alignment and aligned as follows: # my $w = $columns * $cell_w + ( $columns + 1 ) * $border; # my $h = $rows * $cell_h + ( $rows + 1 ) * $border; # my $img = new Gimp::Image( $w, $h, RGB ); # This causes unnecessary paren alignment and prevents the # third equals from aligning. If we remove the unwanted # alignments we get: # my $w = $columns * $cell_w + ( $columns + 1 ) * $border; # my $h = $rows * $cell_h + ( $rows + 1 ) * $border; # my $img = new Gimp::Image( $w, $h, RGB ); # A rule for doing this which works well is to remove alignment # of parens whose containers do not contain other aligning # tokens, with the exception that we always keep alignment of # the first opening paren on a line (for things like 'if' and # 'elsif' statements). if ( $token eq ')' && @imatch_list ) { # undo the corresponding opening paren if: # - it is at the top of the stack # - and not the first overall opening paren # - does not follow a leading keyword on this line my $imate = $mate_index_to_go[$i]; if ( !defined($imate) ) { $imate = -1 } if ( $imatch_list[-1] eq $imate && ( $ibeg > 1 || @imatch_list > 1 ) && $imate > $i_good_paren ) { if ( $ralignment_type_to_go->[$imate] ) { $ralignment_type_to_go->[$imate] = EMPTY_STRING; $ralignment_counts->[$line]--; delete $ralignment_hash_by_line->[$line]->{$imate}; } pop @imatch_list; } } # do not align tokens at lower level than start of line # except for side comments if ( $levels_to_go[$i] < $level_beg ) { next; } #-------------------------------------------------------- # First see if we want to align BEFORE this token #-------------------------------------------------------- # The first possible token that we can align before # is index 2 because: 1) it doesn't normally make sense to # align before the first token and 2) the second # token must be a blank if we are to align before # the third if ( $i < $ibeg + 2 ) { } # must follow a blank token elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { } # otherwise, do not align two in a row to create a # blank field elsif ( $last_vertical_alignment_BEFORE_index == $i - 2 ) { } # align before one of these keywords # (within a line, since $i>1) elsif ( $type eq 'k' ) { # /^(if|unless|and|or|eq|ne)$/ if ( $is_vertical_alignment_keyword{$token} ) { $alignment_type = $token; # Align postfix 'unless' and 'if' if requested (git #116) # These are the only equivalent keywords. For equivalent # token types see '%operator_map'. if ( $token eq 'unless' && $rOpts_valign_if_unless ) { $alignment_type = 'if'; } } } # align qw in a 'use' statement (issue git #93) elsif ( $type eq 'q' ) { if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] eq 'use' ) { $alignment_type = $type; } } # align before one of these types.. elsif ( $is_vertical_alignment_type{$type} && !$is_not_vertical_alignment_token{$token} ) { $alignment_type = $token; # Do not align a terminal token. Although it might # occasionally look ok to do this, this has been found to be # a good general rule. The main problems are: # (1) that the terminal token (such as an = or :) might get # moved far to the right where it is hard to see because # nothing follows it, and # (2) doing so may prevent other good alignments. # Current exceptions are && and || and => if ( $i == $iend ) { $alignment_type = EMPTY_STRING unless ( $is_terminal_alignment_type{$type} ); } # Do not align leading ': (' or '. ('. This would prevent # alignment in something like the following: # $extra_space .= # ( $input_line_number < 10 ) ? " " # : ( $input_line_number < 100 ) ? " " # : ""; # or # $code = # ( $case_matters ? $accessor : " lc($accessor) " ) # . ( $yesno ? " eq " : " ne " ) # Also, do not align a ( following a leading ? so we can # align something like this: # $converter{$_}->{ushortok} = # $PDL::IO::Pic::biggrays # ? ( m/GIF/ ? 0 : 1 ) # : ( m/GIF|RAST|IFF/ ? 0 : 1 ); if ( $type_beg_special_char && $i == $ibeg + 2 && $types_to_go[ $i - 1 ] eq 'b' ) { $alignment_type = EMPTY_STRING; } # Certain tokens only align at the same level as the # initial line level if ( $is_low_level_alignment_token{$token} && $levels_to_go[$i] != $level_beg ) { $alignment_type = EMPTY_STRING; } if ( $token eq '(' ) { # For a paren after keyword, only align if-like parens, # such as: # if ( $a ) { &a } # elsif ( $b ) { &b } # ^-------------------aligned parens if ( $vert_last_nonblank_type eq 'k' && !$is_if_unless_elsif{$vert_last_nonblank_token} ) { $alignment_type = EMPTY_STRING; } # Do not align a spaced-function-paren if requested. # Issue git #53, #73. if ( !$rOpts_function_paren_vertical_alignment ) { my $seqno = $type_sequence_to_go[$i]; $alignment_type = EMPTY_STRING if ( $self->[_ris_function_call_paren_]->{$seqno} ); } # make () align with qw in a 'use' statement (git #93) if ( $tokens_to_go[0] eq 'use' && $types_to_go[0] eq 'k' && defined( $mate_index_to_go[$i] ) && $mate_index_to_go[$i] == $i + 1 ) { $alignment_type = 'q'; ## Note on discussion git #101. We could make this ## a separate type '()' to separate it from qw's: ## $alignment_type = ## $rOpts_valign_empty_parens_with_qw ? 'q' : '()'; } } # be sure the alignment tokens are unique # This experiment didn't work well: reason not determined # if ($token ne $type) {$alignment_type .= $type} } else { ## not a special type } # NOTE: This is deactivated because it causes the previous # if/elsif alignment to fail #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) #{ $alignment_type = $type; } if ($alignment_type) { $last_vertical_alignment_BEFORE_index = $i; } #-------------------------------------------------------- # Next see if we want to align AFTER the previous nonblank #-------------------------------------------------------- # We want to line up ',' and interior ';' tokens, with the added # space AFTER these tokens. (Note: interior ';' is included # because it may occur in short blocks). else { if ( # previous token IS one of these: ( $vert_last_nonblank_type eq ',' || $vert_last_nonblank_type eq ';' ) # and it follows a blank && $types_to_go[ $i - 1 ] eq 'b' # and it's NOT one of these && !$is_closing_token{$type} # then go ahead and align ) { $alignment_type = $vert_last_nonblank_type; } } #----------------------- # Set the alignment type #----------------------- if ($alignment_type) { # but do not align the opening brace of an anonymous sub if ( $token eq '{' && $block_type_to_go[$i] && $matches_ASUB{ $block_type_to_go[$i] } ) { } # and do not make alignments within 'elsif' parens elsif ( $i > $i_elsif_open && $i < $i_elsif_close ) { } # and ignore any tokens which have leading padded spaces # example: perl527/lop.t elsif ( substr( $alignment_type, 0, 1 ) eq SPACE ) { } else { $ralignment_type_to_go->[$i] = $alignment_type; $ralignment_hash_by_line->[$line]->{$i} = $alignment_type; $ralignment_counts->[$line]++; push @imatch_list, $i; } } $vert_last_nonblank_type = $type; $vert_last_nonblank_token = $token; } return; } ## end sub set_vertical_alignment_markers_token_loop } ## end closure set_vertical_alignment_markers sub make_vertical_alignments { my ( $self, $ri_first, $ri_last ) = @_; #---------------------------- # Shortcut for a single token #---------------------------- if ( $max_index_to_go == 0 ) { if ( @{$ri_first} == 1 && $ri_last->[0] == 0 ) { my $rtokens = []; my $rfields = [ $tokens_to_go[0] ]; my $rpatterns = [ $types_to_go[0] ]; my $rfield_lengths = [ $summed_lengths_to_go[1] - $summed_lengths_to_go[0] ]; return [ [ $rtokens, $rfields, $rpatterns, $rfield_lengths ] ]; } # Strange line packing, not fatal but should not happen else { if (DEVEL_MODE) { my $max_line = @{$ri_first} - 1; my $ibeg = $ri_first->[0]; my $iend = $ri_last->[0]; my $tok_b = $tokens_to_go[$ibeg]; my $tok_e = $tokens_to_go[$iend]; my $type_b = $types_to_go[$ibeg]; my $type_e = $types_to_go[$iend]; Fault( "Strange..max_index=0 but nlines=$max_line ibeg=$ibeg tok=$tok_b type=$type_b iend=$iend tok=$tok_e type=$type_e; please check\n" ); } } } #--------------------------------------------------------- # Step 1: Define the alignment tokens for the entire batch #--------------------------------------------------------- my ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line ); # We only need to make this call if vertical alignment of code is # requested or if a line might have a side comment. if ( $rOpts_valign_code || $types_to_go[$max_index_to_go] eq '#' ) { ( $ralignment_type_to_go, $ralignment_counts, $ralignment_hash_by_line ) = $self->set_vertical_alignment_markers( $ri_first, $ri_last ); } #---------------------------------------------- # Step 2: Break each line into alignment fields #---------------------------------------------- my $rline_alignments = []; my $max_line = @{$ri_first} - 1; foreach my $line ( 0 .. $max_line ) { my $ibeg = $ri_first->[$line]; my $iend = $ri_last->[$line]; my $rtok_fld_pat_len = $self->make_alignment_patterns( $ibeg, $iend, $ralignment_type_to_go, $ralignment_counts->[$line], $ralignment_hash_by_line->[$line] ); push @{$rline_alignments}, $rtok_fld_pat_len; } return $rline_alignments; } ## end sub make_vertical_alignments sub get_seqno { # get opening and closing sequence numbers of a token for the vertical # aligner. Assign qw quotes a value to allow qw opening and closing tokens # to be treated somewhat like opening and closing tokens for stacking # tokens by the vertical aligner. my ( $self, $ii, $ending_in_quote ) = @_; my $rLL = $self->[_rLL_]; my $KK = $K_to_go[$ii]; my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_]; if ( $rLL->[$KK]->[_TYPE_] eq 'q' ) { my $SEQ_QW = -1; my $token = $rLL->[$KK]->[_TOKEN_]; if ( $ii > 0 ) { $seqno = $SEQ_QW if ( $token =~ /^qw\s*[\(\{\[]/ ); } else { if ( !$ending_in_quote ) { $seqno = $SEQ_QW if ( $token =~ /[\)\}\]]$/ ); } } } return ($seqno); } ## end sub get_seqno { my %undo_extended_ci; sub initialize_undo_ci { %undo_extended_ci = (); return; } sub undo_ci { # Undo continuation indentation in certain sequences my ( $self, $ri_first, $ri_last, $rix_seqno_controlling_ci ) = @_; my ( $line_1, $line_2, $lev_last ); my $max_line = @{$ri_first} - 1; my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_]; # Prepare a list of controlling indexes for each line if required. # This is used for efficient processing below. Note: this is # critical for speed. In the initial implementation I just looped # through the @$rix_seqno_controlling_ci list below. Using NYT_prof, I # found that this routine was causing a huge run time in large lists. # On a very large list test case, this new coding dropped the run time # of this routine from 30 seconds to 169 milliseconds. my @i_controlling_ci; if ( $rix_seqno_controlling_ci && @{$rix_seqno_controlling_ci} ) { my @tmp = reverse @{$rix_seqno_controlling_ci}; my $ix_next = pop @tmp; foreach my $line ( 0 .. $max_line ) { my $iend = $ri_last->[$line]; while ( defined($ix_next) && $ix_next <= $iend ) { push @{ $i_controlling_ci[$line] }, $ix_next; $ix_next = pop @tmp; } } } # Loop over all lines of the batch ... # Workaround originally created for problem c007, in which the # combination -lp -xci could produce a "Program bug" message in unusual # circumstances. my $skip_SECTION_1; if ( $rOpts_line_up_parentheses && $rOpts_extended_continuation_indentation ) { # Only set this flag if -lp is actually used here foreach my $line ( 0 .. $max_line ) { my $ibeg = $ri_first->[$line]; if ( ref( $leading_spaces_to_go[$ibeg] ) ) { $skip_SECTION_1 = 1; last; } } } foreach my $line ( 0 .. $max_line ) { my $ibeg = $ri_first->[$line]; my $iend = $ri_last->[$line]; my $lev = $levels_to_go[$ibeg]; #----------------------------------- # SECTION 1: Undo needless common CI #----------------------------------- # We are looking at leading tokens and looking for a sequence all # at the same level and all at a higher level than enclosing lines. # For example, we can undo continuation indentation in sort/map/grep # chains # my $dat1 = pack( "n*", # map { $_, $lookup->{$_} } # sort { $a <=> $b } # grep { $lookup->{$_} ne $default } keys %$lookup ); # to become # my $dat1 = pack( "n*", # map { $_, $lookup->{$_} } # sort { $a <=> $b } # grep { $lookup->{$_} ne $default } keys %$lookup ); if ( $line > 0 && !$skip_SECTION_1 ) { # if we have started a chain.. if ($line_1) { # see if it continues.. if ( $lev == $lev_last ) { if ( $types_to_go[$ibeg] eq 'k' && $is_sort_map_grep{ $tokens_to_go[$ibeg] } ) { # chain continues... # check for chain ending at end of a statement my $is_semicolon_terminated = ( $line == $max_line && ( $types_to_go[$iend] eq ';' # with possible side comment || ( $types_to_go[$iend] eq '#' && $iend - $ibeg >= 2 && $types_to_go[ $iend - 2 ] eq ';' && $types_to_go[ $iend - 1 ] eq 'b' ) ) ); $line_2 = $line if ($is_semicolon_terminated); } else { # kill chain $line_1 = undef; } } elsif ( $lev < $lev_last ) { # chain ends with previous line $line_2 = $line - 1; } else { ## ( $lev > $lev_last ) # kill chain $line_1 = undef; } # undo the continuation indentation if a chain ends if ( defined($line_2) && defined($line_1) ) { my $continuation_line_count = $line_2 - $line_1 + 1; @ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ] = (0) x ($continuation_line_count) if ( $continuation_line_count >= 0 ); @leading_spaces_to_go[ @{$ri_first} [ $line_1 .. $line_2 ] ] = @reduced_spaces_to_go[ @{$ri_first} [ $line_1 .. $line_2 ] ]; $line_1 = undef; } } # not in a chain yet.. else { # look for start of a new sort/map/grep chain if ( $lev > $lev_last ) { if ( $types_to_go[$ibeg] eq 'k' && $is_sort_map_grep{ $tokens_to_go[$ibeg] } ) { $line_1 = $line; } } } } #------------------------------------- # SECTION 2: Undo ci at cuddled blocks #------------------------------------- # Note that sub get_final_indentation will be called later to # actually do this, but for now we will tentatively mark cuddled # lines with ci=0 so that the the -xci loop which follows will be # correct at cuddles. if ( $types_to_go[$ibeg] eq '}' && ( $nesting_depth_to_go[$iend] + 1 == $nesting_depth_to_go[$ibeg] ) ) { my $terminal_type = $types_to_go[$iend]; if ( $terminal_type eq '#' && $iend > $ibeg ) { $terminal_type = $types_to_go[ $iend - 1 ]; if ( $terminal_type eq '#' && $iend - 1 > $ibeg ) { $terminal_type = $types_to_go[ $iend - 2 ]; } } # Patch for rt144979, part 2. Coordinated with part 1. # Skip cuddled braces. my $seqno_beg = $type_sequence_to_go[$ibeg]; my $is_cuddled_closing_brace = $seqno_beg && $self->[_ris_cuddled_closing_brace_]->{$seqno_beg}; if ( $terminal_type eq '{' && !$is_cuddled_closing_brace ) { $ci_levels_to_go[$ibeg] = 0; } } #-------------------------------------------------------- # SECTION 3: Undo ci set by sub extended_ci if not needed #-------------------------------------------------------- # Undo the ci of the leading token if its controlling token # went out on a previous line without ci if ( $ci_levels_to_go[$ibeg] ) { my $Kbeg = $K_to_go[$ibeg]; my $seqno = $rseqno_controlling_my_ci->{$Kbeg}; if ( $seqno && $undo_extended_ci{$seqno} ) { # but do not undo ci set by the -lp flag if ( !ref( $reduced_spaces_to_go[$ibeg] ) ) { $ci_levels_to_go[$ibeg] = 0; $leading_spaces_to_go[$ibeg] = $reduced_spaces_to_go[$ibeg]; } } } # Flag any controlling opening tokens in lines without ci. This # will be used later in the above if statement to undo the ci which # they added. The array i_controlling_ci[$line] was prepared at # the top of this routine. if ( !$ci_levels_to_go[$ibeg] && defined( $i_controlling_ci[$line] ) ) { foreach my $i ( @{ $i_controlling_ci[$line] } ) { my $seqno = $type_sequence_to_go[$i]; $undo_extended_ci{$seqno} = 1; } } $lev_last = $lev; } return; } ## end sub undo_ci } { ## begin closure set_logical_padding my %is_math_op; BEGIN { my @q = qw( + - * / ); @is_math_op{@q} = (1) x scalar(@q); } sub set_logical_padding { # Look at a batch of lines and see if extra padding can improve the # alignment when there are certain leading operators. Here is an # example, in which some extra space is introduced before # '( $year' to make it line up with the subsequent lines: # # if ( ( $Year < 1601 ) # || ( $Year > 2899 ) # || ( $EndYear < 1601 ) # || ( $EndYear > 2899 ) ) # { # &Error_OutOfRange; # } # my ( $self, $ri_first, $ri_last, $starting_in_quote ) = @_; my $max_line = @{$ri_first} - 1; my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces, $tok_next, $type_next, $has_leading_op_next, $has_leading_op ); # Patch to produce padding in the first line of short code blocks. # This is part of an update to fix cases b562 .. b983. # This is needed to compensate for a change which was made in 'sub # starting_one_line_block' to prevent blinkers. Previously, that sub # would not look at the total block size and rely on sub # break_long_lines to break up long blocks. Consequently, the # first line of those batches would end in the opening block brace of a # sort/map/grep/eval block. When this was changed to immediately check # for blocks which were too long, the opening block brace would go out # in a single batch, and the block contents would go out as the next # batch. This caused the logic in this routine which decides if the # first line should be padded to be incorrect. To fix this, we set a # flag if the previous batch ended in an opening sort/map/grep/eval # block brace, and use it to adjust the logic to compensate. # For example, the following would have previously been a single batch # but now is two batches. We want to pad the line starting in '$dir': # my (@indices) = # batch n-1 (prev batch n) # sort { # batch n-1 (prev batch n) # $dir eq 'left' # batch n # ? $cells[$a] <=> $cells[$b] # batch n # : $cells[$b] <=> $cells[$a]; # batch n # } ( 0 .. $#cells ); # batch n my $rLL = $self->[_rLL_]; my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_]; my $is_short_block; if ( $K_to_go[0] > 0 ) { my $Kp = $K_to_go[0] - 1; if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) { $Kp -= 1; } if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq '#' ) { $Kp -= 1; if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) { $Kp -= 1; } } my $seqno = $rLL->[$Kp]->[_TYPE_SEQUENCE_]; if ($seqno) { my $block_type = $rblock_type_of_seqno->{$seqno}; if ($block_type) { $is_short_block = $is_sort_map_grep_eval{$block_type}; $is_short_block ||= $want_one_line_block{$block_type}; } } } # looking at each line of this batch.. foreach my $line ( 0 .. $max_line - 1 ) { # see if the next line begins with a logical operator $ibeg = $ri_first->[$line]; $iend = $ri_last->[$line]; $ibeg_next = $ri_first->[ $line + 1 ]; $tok_next = $tokens_to_go[$ibeg_next]; $type_next = $types_to_go[$ibeg_next]; $has_leading_op_next = ( $tok_next =~ /^\w/ ) ? $is_chain_operator{$tok_next} # + - * / : ? && || : $is_chain_operator{$type_next}; # and, or next unless ($has_leading_op_next); # next line must not be at lesser depth next if ( $nesting_depth_to_go[$ibeg] > $nesting_depth_to_go[$ibeg_next] ); # identify the token in this line to be padded on the left $ipad = undef; # handle lines at same depth... if ( $nesting_depth_to_go[$ibeg] == $nesting_depth_to_go[$ibeg_next] ) { # if this is not first line of the batch ... if ( $line > 0 ) { # and we have leading operator.. next if $has_leading_op; # Introduce padding if.. # 1. the previous line is at lesser depth, or # 2. the previous line ends in an assignment # 3. the previous line ends in a 'return' # 4. the previous line ends in a comma # Example 1: previous line at lesser depth # if ( ( $Year < 1601 ) # <- we are here but # || ( $Year > 2899 ) # list has not yet # || ( $EndYear < 1601 ) # collapsed vertically # || ( $EndYear > 2899 ) ) # { # # Example 2: previous line ending in assignment: # $leapyear = # $year % 4 ? 0 # <- We are here # : $year % 100 ? 1 # : $year % 400 ? 0 # : 1; # # Example 3: previous line ending in comma: # push @expr, # /test/ ? undef # : eval($_) ? 1 # : eval($_) ? 1 # : 0; # be sure levels agree (never indent after an indented 'if') next if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] ); # allow padding on first line after a comma but only if: # (1) this is line 2 and # (2) there are at more than three lines and # (3) lines 3 and 4 have the same leading operator # These rules try to prevent padding within a long # comma-separated list. my $ok_comma; if ( $types_to_go[$iendm] eq ',' && $line == 1 && $max_line > 2 ) { my $ibeg_next_next = $ri_first->[ $line + 2 ]; my $tok_next_next = $tokens_to_go[$ibeg_next_next]; $ok_comma = $tok_next_next eq $tok_next; } my $ok_pad = ( $is_assignment{ $types_to_go[$iendm] } || $ok_comma || ( $nesting_depth_to_go[$ibegm] < $nesting_depth_to_go[$ibeg] ) || ( $types_to_go[$iendm] eq 'k' && $tokens_to_go[$iendm] eq 'return' ) ); next if ( !$ok_pad ); # we will add padding before the first token $ipad = $ibeg; } # for first line of the batch.. else { # WARNING: Never indent if first line is starting in a # continued quote, which would change the quote. next if $starting_in_quote; # if this is text after closing '}' # then look for an interior token to pad if ( $types_to_go[$ibeg] eq '}' ) { } # otherwise, we might pad if it looks really good elsif ($is_short_block) { $ipad = $ibeg; } else { # we might pad token $ibeg, so be sure that it # is at the same depth as the next line. next if ( $nesting_depth_to_go[$ibeg] != $nesting_depth_to_go[$ibeg_next] ); # We can pad on line 1 of a statement if at least 3 # lines will be aligned. Otherwise, it # can look very confusing. # We have to be careful not to pad if there are too few # lines. The current rule is: # (1) in general we require at least 3 consecutive lines # with the same leading chain operator token, # (2) but an exception is that we only require two lines # with leading colons if there are no more lines. For example, # the first $i in the following snippet would get padding # by the second rule: # # $i == 1 ? ( "First", "Color" ) # : $i == 2 ? ( "Then", "Rarity" ) # : ( "Then", "Name" ); next if ( $max_line <= 1 ); my $leading_token = $tokens_to_go[$ibeg_next]; my $tokens_differ; # never indent line 1 of a '.' series because # previous line is most likely at same level. # TODO: we should also look at the leading_spaces # of the last output line and skip if it is same # as this line. next if ( $leading_token eq '.' ); my $count = 1; foreach my $l ( 2 .. 3 ) { last if ( $line + $l > $max_line ); $count++; my $ibeg_next_next = $ri_first->[ $line + $l ]; next if ( $tokens_to_go[$ibeg_next_next] eq $leading_token ); $tokens_differ = 1; last; } next if ($tokens_differ); next if ( $count < 3 && $leading_token ne ':' ); $ipad = $ibeg; } } } # find interior token to pad if necessary if ( !defined($ipad) ) { foreach my $i ( $ibeg .. $iend - 1 ) { # find any unclosed container next if ( !$type_sequence_to_go[$i] || !defined( $mate_index_to_go[$i] ) || $mate_index_to_go[$i] <= $iend ); # find next nonblank token to pad $ipad = $inext_to_go[$i]; last if $ipad; } last if ( !$ipad || $ipad > $iend ); } # We cannot pad the first leading token of a file because # it could cause a bug in which the starting indentation # level is guessed incorrectly each time the code is run # though perltidy, thus causing the code to march off to # the right. For example, the following snippet would have # this problem: ## ov_method mycan( $package, '(""' ), $package ## or ov_method mycan( $package, '(0+' ), $package ## or ov_method mycan( $package, '(bool' ), $package ## or ov_method mycan( $package, '(nomethod' ), $package; # If this snippet is within a block this won't happen # unless the user just processes the snippet alone within # an editor. In that case either the user will see and # fix the problem or it will be corrected next time the # entire file is processed with perltidy. my $this_batch = $self->[_this_batch_]; my $peak_batch_size = $this_batch->[_peak_batch_size_]; next if ( $ipad == 0 && $peak_batch_size <= 1 ); # next line must not be at greater depth my $iend_next = $ri_last->[ $line + 1 ]; next if ( $nesting_depth_to_go[ $iend_next + 1 ] > $nesting_depth_to_go[$ipad] ); # lines must be somewhat similar to be padded.. my $inext_next = $inext_to_go[$ibeg_next]; my $type = $types_to_go[$ipad]; # see if there are multiple continuation lines my $logical_continuation_lines = 1; if ( $line + 2 <= $max_line ) { my $leading_token = $tokens_to_go[$ibeg_next]; my $ibeg_next_next = $ri_first->[ $line + 2 ]; if ( $tokens_to_go[$ibeg_next_next] eq $leading_token && $nesting_depth_to_go[$ibeg_next] eq $nesting_depth_to_go[$ibeg_next_next] ) { $logical_continuation_lines++; } } # see if leading types match my $types_match = $types_to_go[$inext_next] eq $type; my $matches_without_bang; # if first line has leading ! then compare the following token if ( !$types_match && $type eq '!' ) { $types_match = $matches_without_bang = $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ]; } if ( # either we have multiple continuation lines to follow # and we are not padding the first token ( $logical_continuation_lines > 1 && ( $ipad > 0 || $is_short_block ) ) # or.. || ( # types must match $types_match # and keywords must match if keyword && !( $type eq 'k' && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next] ) ) ) { #----------------------begin special checks-------------- # # SPECIAL CHECK 1: # A check is needed before we can make the pad. # If we are in a list with some long items, we want each # item to stand out. So in the following example, the # first line beginning with '$casefold->' would look good # padded to align with the next line, but then it # would be indented more than the last line, so we # won't do it. # # ok( # $casefold->{code} eq '0041' # && $casefold->{status} eq 'C' # && $casefold->{mapping} eq '0061', # 'casefold 0x41' # ); # # Note: # It would be faster, and almost as good, to use a comma # count, and not pad if comma_count > 1 and the previous # line did not end with a comma. # my $ok_to_pad = 1; my $ibg = $ri_first->[ $line + 1 ]; my $depth = $nesting_depth_to_go[ $ibg + 1 ]; # just use simplified formula for leading spaces to avoid # needless sub calls my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg]; # look at each line beyond the next .. my $l = $line + 1; foreach my $ltest ( $line + 2 .. $max_line ) { $l = $ltest; my $ibeg_t = $ri_first->[$l]; # quit looking at the end of this container last if ( $nesting_depth_to_go[ $ibeg_t + 1 ] < $depth ) || ( $nesting_depth_to_go[$ibeg_t] < $depth ); # cannot do the pad if a later line would be # outdented more if ( $levels_to_go[$ibeg_t] + $ci_levels_to_go[$ibeg_t] < $lsp ) { $ok_to_pad = 0; last; } } # don't pad if we end in a broken list if ( $l == $max_line ) { my $i2 = $ri_last->[$l]; if ( $types_to_go[$i2] eq '#' ) { my $i1 = $ri_first->[$l]; next if terminal_type_i( $i1, $i2 ) eq ','; } } # SPECIAL CHECK 2: # a minus may introduce a quoted variable, and we will # add the pad only if this line begins with a bare word, # such as for the word 'Button' here: # [ # Button => "Print letter \"~$_\"", # -command => [ sub { print "$_[0]\n" }, $_ ], # -accelerator => "Meta+$_" # ]; # # On the other hand, if 'Button' is quoted, it looks best # not to pad: # [ # 'Button' => "Print letter \"~$_\"", # -command => [ sub { print "$_[0]\n" }, $_ ], # -accelerator => "Meta+$_" # ]; if ( $types_to_go[$ibeg_next] eq 'm' ) { $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q'; } next unless $ok_to_pad; #----------------------end special check--------------- my $length_1 = total_line_length( $ibeg, $ipad - 1 ); my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 ); $pad_spaces = $length_2 - $length_1; # If the first line has a leading ! and the second does # not, then remove one space to try to align the next # leading characters, which are often the same. For example: # if ( !$ts # || $ts == $self->Holder # || $self->Holder->Type eq "Arena" ) # # This usually helps readability, but if there are subsequent # ! operators things will still get messed up. For example: # # if ( !exists $Net::DNS::typesbyname{$qtype} # && exists $Net::DNS::classesbyname{$qtype} # && !exists $Net::DNS::classesbyname{$qclass} # && exists $Net::DNS::typesbyname{$qclass} ) # We can't fix that. if ($matches_without_bang) { $pad_spaces-- } # make sure this won't change if -lp is used my $indentation_1 = $leading_spaces_to_go[$ibeg]; if ( ref($indentation_1) && $indentation_1->get_recoverable_spaces() == 0 ) { my $indentation_2 = $leading_spaces_to_go[$ibeg_next]; if ( ref($indentation_2) && $indentation_2->get_recoverable_spaces() != 0 ) { $pad_spaces = 0; } } # we might be able to handle a pad of -1 by removing a blank # token if ( $pad_spaces < 0 ) { # Deactivated for -kpit due to conflict. This block deletes # a space in an attempt to improve alignment in some cases, # but it may conflict with user spacing requests. For now # it is just deactivated if the -kpit option is used. if ( $pad_spaces == -1 ) { if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' && !%keyword_paren_inner_tightness ) { $self->pad_token( $ipad - 1, $pad_spaces ); } } $pad_spaces = 0; } # now apply any padding for alignment if ( $ipad >= 0 && $pad_spaces ) { my $length_t = total_line_length( $ibeg, $iend ); if ( $pad_spaces + $length_t <= $maximum_line_length_at_level[ $levels_to_go[$ibeg] ] ) { $self->pad_token( $ipad, $pad_spaces ); } } } } continue { $iendm = $iend; $ibegm = $ibeg; $has_leading_op = $has_leading_op_next; } ## end of loop over lines return; } ## end sub set_logical_padding } ## end closure set_logical_padding sub pad_token { # insert $pad_spaces before token number $ipad my ( $self, $ipad, $pad_spaces ) = @_; my $rLL = $self->[_rLL_]; my $KK = $K_to_go[$ipad]; my $tok = $rLL->[$KK]->[_TOKEN_]; my $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_]; if ( $pad_spaces > 0 ) { $tok = SPACE x $pad_spaces . $tok; $tok_len += $pad_spaces; } elsif ( $pad_spaces == 0 ) { return; } elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq SPACE ) { $tok = EMPTY_STRING; $tok_len = 0; } else { # shouldn't happen DEVEL_MODE && Fault("unexpected request for pad spaces = $pad_spaces\n"); return; } $tok = $rLL->[$KK]->[_TOKEN_] = $tok; $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len; $token_lengths_to_go[$ipad] += $pad_spaces; $tokens_to_go[$ipad] = $tok; foreach my $i ( $ipad .. $max_index_to_go ) { $summed_lengths_to_go[ $i + 1 ] += $pad_spaces; } return; } ## end sub pad_token sub xlp_tweak { # Remove one indentation space from unbroken containers marked with # 'K_extra_space'. These are mostly two-line lists with short names # formatted with -xlp -pt=2. # # Before this fix (extra space in line 2): # is($module->VERSION, $expected, # "$main_module->VERSION matches $module->VERSION ($expected)"); # # After this fix: # is($module->VERSION, $expected, # "$main_module->VERSION matches $module->VERSION ($expected)"); # # Notes: # - This fixes issue git #106 # - This must be called after 'set_logical_padding'. # - This is currently only applied to -xlp. It would also work for -lp # but that style is essentially frozen. my ( $self, $ri_first, $ri_last ) = @_; # Must be 2 or more lines return if ( @{$ri_first} <= 1 ); # Pull indentation object from start of second line my $ibeg_1 = $ri_first->[1]; my $lp_object = $leading_spaces_to_go[$ibeg_1]; return if ( !ref($lp_object) ); # This only applies to an indentation object with a marked token my $K_extra_space = $lp_object->get_K_extra_space(); return unless ($K_extra_space); # Look for the marked token within the first line of this batch my $ibeg_0 = $ri_first->[0]; my $iend_0 = $ri_last->[0]; my $ii = $ibeg_0 + $K_extra_space - $K_to_go[$ibeg_0]; return if ( $ii <= $ibeg_0 || $ii > $iend_0 ); # Skip padded tokens, they have already been aligned my $tok = $tokens_to_go[$ii]; return if ( substr( $tok, 0, 1 ) eq SPACE ); # Skip 'if'-like statements, this does not improve them return if ( $types_to_go[$ibeg_0] eq 'k' && $is_if_unless_elsif{ $tokens_to_go[$ibeg_0] } ); # Looks okay, reduce indentation by 1 space if possible my $spaces = $lp_object->get_spaces(); if ( $spaces > 0 ) { $lp_object->decrease_SPACES(1); } return; } ## end sub xlp_tweak { ## begin closure make_alignment_patterns my %keyword_map; my %operator_map; my %is_w_n_C; my %is_my_local_our; my %is_kwU; my %is_use_like; my %is_binary_type; my %is_binary_keyword; my %name_map; BEGIN { # Note: %block_type_map is now global to enable the -gal=s option # Map certain keywords to the same 'if' class to align # long if/elsif sequences. [elsif.pl]. But note that this is # only for purposes of making the patterns, not alignment tokens. # The only possible equivalent alignment tokens are 'if' and 'unless', # and this is handled earlier under control of $rOpts_valign_if_unless # to avoid making this a global hash. %keyword_map = ( 'unless' => 'if', 'else' => 'if', 'elsif' => 'if', 'when' => 'given', 'default' => 'given', 'case' => 'switch', # treat an 'undef' similar to numbers and quotes 'undef' => 'Q', ); # Map certain operators to the same class for alignment. # Note that this map is for the alignment tokens, not the patterns. # We could have placed 'unless' => 'if' here, but since that is # under control of $rOpts_valign_if_unless, it is handled elsewhere. %operator_map = ( '!~' => '=~', '+=' => '+=', '-=' => '+=', '*=' => '+=', '/=' => '+=', ); %is_w_n_C = ( 'w' => 1, 'n' => 1, 'C' => 1, ); # leading keywords which to skip for efficiency when making parenless # container names my @q = qw( my local our return ); @{is_my_local_our}{@q} = (1) x scalar(@q); # leading keywords where we should just join one token to form # parenless name @q = qw( use ); @{is_use_like}{@q} = (1) x scalar(@q); # leading token types which may be used to make a container name @q = qw( k w U ); @{is_kwU}{@q} = (1) x scalar(@q); # token types which prevent using leading word as a container name @q = qw( x / : % . | ^ < = > || >= != *= => !~ == && |= .= -= =~ += <= %= ^= x= ~~ ** << /= &= // >> ~. &. |. ^. **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~ ); push @q, ','; @{is_binary_type}{@q} = (1) x scalar(@q); # token keywords which prevent using leading word as a container name @q = qw(and or err eq ne cmp); @is_binary_keyword{@q} = (1) x scalar(@q); # Some common function calls whose args can be aligned. These do not # give good alignments if the lengths differ significantly. %name_map = ( 'unlike' => 'like', 'isnt' => 'is', ##'is_deeply' => 'is', # poor; names lengths too different ); } ## end BEGIN sub make_alignment_patterns { my ( $self, $ibeg, $iend, $ralignment_type_to_go, $alignment_count, $ralignment_hash ) = @_; #------------------------------------------------------------------ # This sub creates arrays of vertical alignment info for one output # line. #------------------------------------------------------------------ # Input parameters: # $ibeg, $iend - index range of this line in the _to_go arrays # $ralignment_type_to_go - alignment type of tokens, like '=', if any # $alignment_count - number of alignment tokens in the line # $ralignment_hash - this contains all of the alignments for this # line. It is not yet used but is available for future coding in # case there is a need to do a preliminary scan of alignment tokens. # The arrays which are created contain strings that can be tested by # the vertical aligner to see if consecutive lines can be aligned # vertically. # # The four arrays are indexed on the vertical # alignment fields and are: # @tokens - a list of any vertical alignment tokens for this line. # These are tokens, such as '=' '&&' '#' etc which # we want to might align vertically. These are # decorated with various information such as # nesting depth to prevent unwanted vertical # alignment matches. # @fields - the actual text of the line between the vertical alignment # tokens. # @patterns - a modified list of token types, one for each alignment # field. These should normally each match before alignment is # allowed, even when the alignment tokens match. # @field_lengths - the display width of each field if (DEVEL_MODE) { my $new_count = 0; if ( defined($ralignment_hash) ) { $new_count = keys %{$ralignment_hash}; } my $old_count = $alignment_count; $old_count = 0 unless ($old_count); if ( $new_count != $old_count ) { my $K = $K_to_go[$ibeg]; my $rLL = $self->[_rLL_]; my $lnl = $rLL->[$K]->[_LINE_INDEX_]; Fault( "alignment hash token count gives count=$new_count but old count is $old_count near line=$lnl\n" ); } } # ------------------------------------- # Shortcut for lines without alignments # ------------------------------------- if ( !$alignment_count ) { my $rtokens = []; my $rfield_lengths = [ $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg] ]; my $rpatterns; my $rfields; if ( $ibeg == $iend ) { $rfields = [ $tokens_to_go[$ibeg] ]; $rpatterns = [ $types_to_go[$ibeg] ]; } else { $rfields = [ join( EMPTY_STRING, @tokens_to_go[ $ibeg .. $iend ] ) ]; $rpatterns = [ join( EMPTY_STRING, @types_to_go[ $ibeg .. $iend ] ) ]; } return [ $rtokens, $rfields, $rpatterns, $rfield_lengths ]; } my $i_start = $ibeg; my $depth = 0; my $i_depth_prev = $i_start; my $depth_prev = $depth; my %container_name = ( 0 => EMPTY_STRING ); my $saw_exclamation_mark = 0; my @tokens = (); my @fields = (); my @patterns = (); my @field_lengths = (); #------------------------------------------------------------- # Make a container name for any uncontained commas, issue c089 #------------------------------------------------------------- # This is a generalization of the fix for rt136416 which was a # specialized patch just for 'use Module' statements. # We restrict this to semicolon-terminated statements; that way # we know that the top level commas are not in a list container. if ( $ibeg == 0 && $iend == $max_index_to_go ) { my $iterm = $max_index_to_go; if ( $types_to_go[$iterm] eq '#' ) { $iterm = iprev_to_go($iterm); } # Alignment lines ending like '=> sub {'; fixes issue c093 my $term_type_ok = $types_to_go[$iterm] eq ';'; $term_type_ok ||= $tokens_to_go[$iterm] eq '{' && $block_type_to_go[$iterm]; if ( $iterm > $ibeg && $term_type_ok && !$is_my_local_our{ $tokens_to_go[$ibeg] } && $levels_to_go[$ibeg] eq $levels_to_go[$iterm] ) { $container_name{'0'} = make_uncontained_comma_name( $iterm, $ibeg, $iend ); } } #-------------------------------- # Begin main loop over all tokens #-------------------------------- my $j = 0; # field index $patterns[0] = EMPTY_STRING; my %token_count; for my $i ( $ibeg .. $iend ) { #------------------------------------------------------------- # Part 1: keep track of containers balanced on this line only. #------------------------------------------------------------- # These are used below to prevent unwanted cross-line alignments. # Unbalanced containers already avoid aligning across # container boundaries. my $type = $types_to_go[$i]; if ( $type_sequence_to_go[$i] ) { my $token = $tokens_to_go[$i]; if ( $is_opening_token{$token} ) { # if container is balanced on this line... my $i_mate = $mate_index_to_go[$i]; if ( !defined($i_mate) ) { $i_mate = -1 } if ( $i_mate > $i && $i_mate <= $iend ) { $i_depth_prev = $i; $depth_prev = $depth; $depth++; # Append the previous token name to make the container name # more unique. This name will also be given to any commas # within this container, and it helps avoid undesirable # alignments of different types of containers. # Containers beginning with { and [ are given those names # for uniqueness. That way commas in different containers # will not match. Here is an example of what this prevents: # a => [ 1, 2, 3 ], # b => { b1 => 4, b2 => 5 }, # Here is another example of what we avoid by labeling the # commas properly: # is_d( [ $a, $a ], [ $b, $c ] ); # is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } ); # is_d( [ \$a, \$a ], [ \$b, \$c ] ); my $name = $token eq '(' ? $self->make_paren_name($i) : $token; # name cannot be '.', so change to something else if so if ( $name eq '.' ) { $name = 'dot' } $container_name{$depth} = "+" . $name; # Make the container name even more unique if necessary. # If we are not vertically aligning this opening paren, # append a character count to avoid bad alignment since # it usually looks bad to align commas within containers # for which the opening parens do not align. Here # is an example very BAD alignment of commas (because # the atan2 functions are not all aligned): # $XY = # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) + # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) - # $X * atan2( $X, 1 ) - # $Y * atan2( $Y, 1 ); # # On the other hand, it is usually okay to align commas # if opening parens align, such as: # glVertex3d( $cx + $s * $xs, $cy, $z ); # glVertex3d( $cx, $cy + $s * $ys, $z ); # glVertex3d( $cx - $s * $xs, $cy, $z ); # glVertex3d( $cx, $cy - $s * $ys, $z ); # # To distinguish between these situations, we append # the length of the line from the previous matching # token, or beginning of line, to the function name. # This will allow the vertical aligner to reject # undesirable matches. # if we are not aligning on this paren... if ( !$ralignment_type_to_go->[$i] ) { # Add the length to the name ... my $len = $summed_lengths_to_go[$i] - $summed_lengths_to_go[$i_start]; # Do not include the length of any '!'. Otherwise, # commas in the following line will not match: # ok( 20, tapprox( ( pdl 2, 3 ), ( pdl 2, 3 ) ) ); # ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) ); if ($saw_exclamation_mark) { $len -= 1 } # For first token, use distance from start of line # but subtract off the indentation due to level. # Otherwise, results could vary with indentation. if ( $i_start == $ibeg ) { $len += leading_spaces_to_go($ibeg) - $levels_to_go[$i_start] * $rOpts_indent_columns; } if ( $len < 0 ) { $len = 0 } # tack this length onto the container name to try # to make a unique token name $container_name{$depth} .= "-" . $len; } ## end if ( !$ralignment_type_to_go...) } ## end if ( $i_mate > $i && $i_mate...) } ## end if ( $is_opening_token...) elsif ( $is_closing_token{$token} ) { $i_depth_prev = $i; $depth_prev = $depth; $depth-- if $depth > 0; } else { ## must be ternary } } ## end if ( $type_sequence_to_go...) #------------------------------------------------------------ # Part 2: if we find a new synchronization token, we are done # with a field #------------------------------------------------------------ if ( $i > $i_start && $ralignment_type_to_go->[$i] ) { my $tok = my $raw_tok = $ralignment_type_to_go->[$i]; # map similar items my $tok_map = $operator_map{$tok}; $tok = $tok_map if ($tok_map); # make separators in different nesting depths unique # by appending the nesting depth digit. if ( $raw_tok ne '#' ) { $tok .= "$nesting_depth_to_go[$i]"; } # also decorate commas with any container name to avoid # unwanted cross-line alignments. if ( $raw_tok eq ',' || $raw_tok eq '=>' ) { # If we are at an opening token which increased depth, we have # to use the name from the previous depth. my $depth_last = $i == $i_depth_prev ? $depth_prev : $depth; my $depth_p = ( $depth_last < $depth ? $depth_last : $depth ); if ( $container_name{$depth_p} ) { $tok .= $container_name{$depth_p}; } } # Patch to avoid aligning leading and trailing if, unless. # Mark trailing if, unless statements with container names. # This makes them different from leading if, unless which # are not so marked at present. If we ever need to name # them too, we could use ci to distinguish them. # Example problem to avoid: # return ( 2, "DBERROR" ) # if ( $retval == 2 ); # if ( scalar @_ ) { # my ( $a, $b, $c, $d, $e, $f ) = @_; # } if ( $raw_tok eq '(' ) { if ( $ci_levels_to_go[$ibeg] && $container_name{$depth} =~ /^\+(if|unless)/ ) { $tok .= $container_name{$depth}; } } # Decorate block braces with block types to avoid # unwanted alignments such as the following: # foreach ( @{$routput_array} ) { $fh->print($_) } # eval { $fh->close() }; if ( $raw_tok eq '{' && $block_type_to_go[$i] ) { my $block_type = $block_type_to_go[$i]; # map certain related block types to allow # else blocks to align $block_type = $block_type_map{$block_type} if ( defined( $block_type_map{$block_type} ) ); # remove sub names to allow one-line sub braces to align # regardless of name if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' } # allow all control-type blocks to align if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' } $tok .= $block_type; # Avoid aligning opening braces across leading ci level # changes by marking block type with _ci (issue c224) if ( $ci_levels_to_go[$ibeg] ) { $tok .= '_1' } } # Mark multiple copies of certain tokens with the copy number # This will allow the aligner to decide if they are matched. # For now, only do this for equals. For example, the two # equals on the next line will be labeled '=0' and '=0.2'. # Later, the '=0.2' will be ignored in alignment because it # has no match. # $| = $debug = 1 if $opt_d; # $full_index = 1 if $opt_i; if ( $raw_tok eq '=' || $raw_tok eq '=>' ) { $token_count{$tok}++; if ( $token_count{$tok} > 1 ) { $tok .= '.' . $token_count{$tok}; } } # concatenate the text of the consecutive tokens to form # the field push( @fields, join( EMPTY_STRING, @tokens_to_go[ $i_start .. $i - 1 ] ) ); push @field_lengths, $summed_lengths_to_go[$i] - $summed_lengths_to_go[$i_start]; # store the alignment token for this field push( @tokens, $tok ); # get ready for the next batch $i_start = $i; $saw_exclamation_mark = 0; $j++; $patterns[$j] = EMPTY_STRING; } ## end if ( new synchronization token #----------------------------------------------- # Part 3: continue accumulating the next pattern #----------------------------------------------- # for keywords we have to use the actual text if ( $type eq 'k' ) { my $tok_fix = $tokens_to_go[$i]; # but map certain keywords to a common string to allow # alignment. $tok_fix = $keyword_map{$tok_fix} if ( defined( $keyword_map{$tok_fix} ) ); $patterns[$j] .= $tok_fix; } elsif ( $type eq 'b' ) { $patterns[$j] .= $type; } # Mark most things before arrows as a quote to # get them to line up. Testfile: mixed.pl. # handle $type =~ /^[wnC]$/ elsif ( $is_w_n_C{$type} ) { my $type_fix = $type; if ( $i < $iend - 1 ) { my $next_type = $types_to_go[ $i + 1 ]; my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); if ( $types_to_go[$i_next_nonblank] eq '=>' ) { $type_fix = 'Q'; # Patch to ignore leading minus before words, # by changing pattern 'mQ' into just 'Q', # so that we can align things like this: # Button => "Print letter \"~$_\"", # -command => [ sub { print "$_[0]\n" }, $_ ], if ( $patterns[$j] eq 'm' ) { $patterns[$j] = EMPTY_STRING; } } } # Convert a bareword within braces into a quote for # matching. This will allow alignment of expressions like # this: # local ( $SIG{'INT'} ) = IGNORE; # local ( $SIG{ALRM} ) = 'POSTMAN'; if ( $type eq 'w' && $i > $ibeg && $i < $iend && $types_to_go[ $i - 1 ] eq 'L' && $types_to_go[ $i + 1 ] eq 'R' ) { $type_fix = 'Q'; } # patch to make numbers and quotes align if ( $type eq 'n' ) { $type_fix = 'Q' } $patterns[$j] .= $type_fix; } ## end elsif ( $is_w_n_C{$type} ) # ignore any ! in patterns elsif ( $type eq '!' ) { $saw_exclamation_mark = 1; } # everything else else { $patterns[$j] .= $type; # remove any zero-level name at first fat comma if ( $depth == 0 && $type eq '=>' ) { $container_name{$depth} = EMPTY_STRING; } } } ## end for my $i ( $ibeg .. $iend) #--------------------------------------------------------------- # End of main loop .. join text of tokens to make the last field #--------------------------------------------------------------- push( @fields, join( EMPTY_STRING, @tokens_to_go[ $i_start .. $iend ] ) ); push @field_lengths, $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$i_start]; return [ \@tokens, \@fields, \@patterns, \@field_lengths ]; } ## end sub make_alignment_patterns sub make_uncontained_comma_name { my ( $iterm, $ibeg, $iend ) = @_; # Make a container name by combining all leading barewords, # keywords and functions. my $name = EMPTY_STRING; my $count = 0; my $count_max; my $iname_end; my $ilast_blank; for ( $ibeg .. $iterm ) { my $type = $types_to_go[$_]; if ( $type eq 'b' ) { $ilast_blank = $_; next; } my $token = $tokens_to_go[$_]; # Give up if we find an opening paren, binary operator or # comma within or after the proposed container name. if ( $token eq '(' || $is_binary_type{$type} || $type eq 'k' && $is_binary_keyword{$token} ) { $name = EMPTY_STRING; last; } # The container name is only built of certain types: last if ( !$is_kwU{$type} ); # Normally it is made of one word, but two words for 'use' if ( $count == 0 ) { if ( $type eq 'k' && $is_use_like{ $tokens_to_go[$_] } ) { $count_max = 2; } else { $count_max = 1; } } elsif ( defined($count_max) && $count >= $count_max ) { last; } else { ## continue } if ( defined( $name_map{$token} ) ) { $token = $name_map{$token}; } $name .= SPACE . $token; $iname_end = $_; $count++; } # Require a space after the container name token(s) if ( $name && defined($ilast_blank) && $ilast_blank > $iname_end ) { $name = substr( $name, 1 ); } return $name; } ## end sub make_uncontained_comma_name } ## end closure make_alignment_patterns sub make_paren_name { my ( $self, $i ) = @_; # The token at index $i is a '('. # Create an alignment name for it to avoid incorrect alignments. # Start with the name of the previous nonblank token... my $name = EMPTY_STRING; my $im = $i - 1; return EMPTY_STRING if ( $im < 0 ); if ( $types_to_go[$im] eq 'b' ) { $im--; } return EMPTY_STRING if ( $im < 0 ); $name = $tokens_to_go[$im]; # Prepend any sub name to an isolated -> to avoid unwanted alignments # [test case is test8/penco.pl] if ( $name eq '->' ) { $im--; if ( $im >= 0 && $types_to_go[$im] ne 'b' ) { $name = $tokens_to_go[$im] . $name; } } # Finally, remove any leading arrows if ( substr( $name, 0, 2 ) eq '->' ) { $name = substr( $name, 2 ); } return $name; } ## end sub make_paren_name { ## begin closure get_final_indentation my ( $last_indentation_written, $last_unadjusted_indentation, $last_leading_token ); sub initialize_get_final_indentation { $last_indentation_written = 0; $last_unadjusted_indentation = 0; $last_leading_token = EMPTY_STRING; return; } ## end sub initialize_get_final_indentation sub get_final_indentation { my ( $self, # $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last, $rindentation_list, $level_jump, $starting_in_quote, $is_static_block_comment, ) = @_; #-------------------------------------------------------------- # This routine makes any necessary adjustments to get the final # indentation of a line in the Formatter. #-------------------------------------------------------------- # It starts with the basic indentation which has been defined for the # leading token, and then takes into account any options that the user # has set regarding special indenting and outdenting. # This routine has to resolve a number of complex interacting issues, # including: # 1. The various -cti=n type flags, which contain the desired change in # indentation for lines ending in commas and semicolons, should be # followed, # 2. qw quotes require special processing and do not fit perfectly # with normal containers, # 3. formatting with -wn can complicate things, especially with qw # quotes, # 4. formatting with the -lp option is complicated, and does not # work well with qw quotes and with -wn formatting. # 5. a number of special situations, such as 'cuddled' formatting. # 6. This routine is mainly concerned with outdenting closing tokens # but note that there is some overlap with the functions of sub # undo_ci, which was processed earlier, so care has to be taken to # keep them coordinated. # Find the last code token of this line my $i_terminal = $iend; my $terminal_type = $types_to_go[$iend]; if ( $terminal_type eq '#' && $i_terminal > $ibeg ) { $i_terminal -= 1; $terminal_type = $types_to_go[$i_terminal]; if ( $terminal_type eq 'b' && $i_terminal > $ibeg ) { $i_terminal -= 1; $terminal_type = $types_to_go[$i_terminal]; } } my $is_outdented_line; my $type_beg = $types_to_go[$ibeg]; my $token_beg = $tokens_to_go[$ibeg]; my $level_beg = $levels_to_go[$ibeg]; my $block_type_beg = $block_type_to_go[$ibeg]; my $leading_spaces_beg = $leading_spaces_to_go[$ibeg]; my $seqno_beg = $type_sequence_to_go[$ibeg]; my $is_closing_type_beg = $is_closing_type{$type_beg}; # QW INDENTATION PATCH 3: my $seqno_qw_closing; if ( $type_beg eq 'q' && $ibeg == 0 ) { my $KK = $K_to_go[$ibeg]; $seqno_qw_closing = $self->[_rending_multiline_qw_seqno_by_K_]->{$KK}; } my $is_semicolon_terminated = $terminal_type eq ';' && ( $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg] || $seqno_qw_closing ); # NOTE: A future improvement would be to make it semicolon terminated # even if it does not have a semicolon but is followed by a closing # block brace. This would undo ci even for something like the # following, in which the final paren does not have a semicolon because # it is a possible weld location: # if ($BOLD_MATH) { # ( # $labels, $comment, # join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' ) # ) # } # # MOJO patch: Set a flag if this lines begins with ')->' my $leading_paren_arrow = ( $is_closing_type_beg && $token_beg eq ')' && ( ( $ibeg < $i_terminal && $types_to_go[ $ibeg + 1 ] eq '->' ) || ( $ibeg < $i_terminal - 1 && $types_to_go[ $ibeg + 1 ] eq 'b' && $types_to_go[ $ibeg + 2 ] eq '->' ) ) ); #--------------------------------------------------------- # Section 1: set a flag and a default indentation # # Most lines are indented according to the initial token. # But it is common to outdent to the level just after the # terminal token in certain cases... # adjust_indentation flag: # 0 - do not adjust # 1 - outdent # 2 - vertically align with opening token # 3 - indent #--------------------------------------------------------- my $adjust_indentation = 0; my $default_adjust_indentation = 0; # Parameters needed for option 2, aligning with opening token: my ( $opening_indentation, $opening_offset, $is_leading, $opening_exists ); #------------------------------------- # Section 1A: # if line starts with a sequenced item #------------------------------------- if ( $seqno_beg || $seqno_qw_closing ) { # This can be tedious so we let a sub do it ( $adjust_indentation, $default_adjust_indentation, $opening_indentation, $opening_offset, $is_leading, $opening_exists, ) = $self->get_closing_token_indentation( $ibeg, $iend, $ri_first, $ri_last, $rindentation_list, $level_jump, $i_terminal, $is_semicolon_terminated, $seqno_qw_closing, ); } #-------------------------------------------------------- # Section 1B: # if at ');', '};', '>;', and '];' of a terminal qw quote #-------------------------------------------------------- elsif ( substr( $rpatterns->[0], 0, 2 ) eq 'qb' && substr( $rfields->[0], -1, 1 ) eq ';' ## $rpatterns->[0] =~ /^qb*;$/ && $rfields->[0] =~ /^([\)\}\]\>]);$/ ) { if ( $closing_token_indentation{$1} == 0 ) { $adjust_indentation = 1; } else { $adjust_indentation = 3; } } else { ## ok } #--------------------------------------------------------- # Section 2: set indentation according to flag set above # # Select the indentation object to define leading # whitespace. If we are outdenting something like '} } );' # then we want to use one level below the last token # ($i_terminal) in order to get it to fully outdent through # all levels. #--------------------------------------------------------- my $indentation; my $lev; my $level_end = $levels_to_go[$iend]; #------------------------------------ # Section 2A: adjust_indentation == 0 # No change in indentation #------------------------------------ if ( $adjust_indentation == 0 ) { $indentation = $leading_spaces_beg; $lev = $level_beg; } #------------------------------------------------------------------- # Section 2B: adjust_indentation == 1 # Change the indentation to be that of a different token on the line #------------------------------------------------------------------- elsif ( $adjust_indentation == 1 ) { # Previously, the indentation of the terminal token was used: # OLD CODING: # $indentation = $reduced_spaces_to_go[$i_terminal]; # $lev = $levels_to_go[$i_terminal]; # Generalization for MOJO patch: # Use the lowest level indentation of the tokens on the line. # For example, here we can use the indentation of the ending ';': # } until ($selection > 0 and $selection < 10); # ok to use ';' # But this will not outdent if we use the terminal indentation: # )->then( sub { # use indentation of the ->, not the { # Warning: reduced_spaces_to_go[] may be a reference, do not # do numerical checks with it my $i_ind = $ibeg; $indentation = $reduced_spaces_to_go[$i_ind]; $lev = $levels_to_go[$i_ind]; while ( $i_ind < $i_terminal ) { $i_ind++; if ( $levels_to_go[$i_ind] < $lev ) { $indentation = $reduced_spaces_to_go[$i_ind]; $lev = $levels_to_go[$i_ind]; } } } #-------------------------------------------------------------- # Section 2C: adjust_indentation == 2 # Handle indented closing token which aligns with opening token #-------------------------------------------------------------- elsif ( $adjust_indentation == 2 ) { # handle option to align closing token with opening token $lev = $level_beg; # calculate spaces needed to align with opening token my $space_count = get_spaces($opening_indentation) + $opening_offset; # Indent less than the previous line. # # Problem: For -lp we don't exactly know what it was if there # were recoverable spaces sent to the aligner. A good solution # would be to force a flush of the vertical alignment buffer, so # that we would know. For now, this rule is used for -lp: # # When the last line did not start with a closing token we will # be optimistic that the aligner will recover everything wanted. # # This rule will prevent us from breaking a hierarchy of closing # tokens, and in a worst case will leave a closing paren too far # indented, but this is better than frequently leaving it not # indented enough. my $last_spaces = get_spaces($last_indentation_written); if ( ref($last_indentation_written) && !$is_closing_token{$last_leading_token} ) { $last_spaces += get_recoverable_spaces($last_indentation_written); } # reset the indentation to the new space count if it works # only options are all or none: nothing in-between looks good $lev = $level_beg; my $diff = $last_spaces - $space_count; if ( $diff > 0 ) { $indentation = $space_count; } else { # We need to fix things ... but there is no good way to do it. # The best solution is for the user to use a longer maximum # line length. We could get a smooth variation if we just move # the paren in using # $space_count -= ( 1 - $diff ); # But unfortunately this can give a rather unbalanced look. # For -xlp we currently allow a tolerance of one indentation # level and then revert to a simpler default. This will jump # suddenly but keeps a balanced look. if ( $rOpts_extended_line_up_parentheses && $diff >= -$rOpts_indent_columns && $space_count > $leading_spaces_beg ) { $indentation = $space_count; } # Otherwise revert to defaults elsif ( $default_adjust_indentation == 0 ) { $indentation = $leading_spaces_beg; } elsif ( $default_adjust_indentation == 1 ) { $indentation = $reduced_spaces_to_go[$i_terminal]; $lev = $levels_to_go[$i_terminal]; } else { ## ok - maybe default_adjust_indentation > 1 ? } } } #------------------------------------------------------------- # Section 2D: adjust_indentation == 3 # Full indentation of closing tokens (-icb and -icp or -cti=2) #------------------------------------------------------------- else { # handle -icb (indented closing code block braces) # Updated method for indented block braces: indent one full level if # there is no continuation indentation. This will occur for major # structures such as sub, if, else, but not for things like map # blocks. # # Note: only code blocks without continuation indentation are # handled here (if, else, unless, ..). In the following snippet, # the terminal brace of the sort block will have continuation # indentation as shown so it will not be handled by the coding # here. We would have to undo the continuation indentation to do # this, but it probably looks ok as is. This is a possible future # update for semicolon terminated lines. # # if ($sortby eq 'date' or $sortby eq 'size') { # @files = sort { # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby} # or $a cmp $b # } @files; # } # if ( $block_type_beg && $ci_levels_to_go[$i_terminal] == 0 ) { my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] ); $indentation = $spaces + $rOpts_indent_columns; # NOTE: for -lp we could create a new indentation object, but # there is probably no need to do it } # handle -icp and any -icb block braces which fall through above # test such as the 'sort' block mentioned above. else { # There are currently two ways to handle -icp... # One way is to use the indentation of the previous line: # $indentation = $last_indentation_written; # The other way is to use the indentation that the previous line # would have had if it hadn't been adjusted: $indentation = $last_unadjusted_indentation; # Current method: use the minimum of the two. This avoids # inconsistent indentation. if ( get_spaces($last_indentation_written) < get_spaces($indentation) ) { $indentation = $last_indentation_written; } } # use previous indentation but use own level # to cause list to be flushed properly $lev = $level_beg; } #------------------------------------------------------------- # Remember indentation except for multi-line quotes, which get # no indentation #------------------------------------------------------------- if ( !( $ibeg == 0 && $starting_in_quote ) ) { $last_indentation_written = $indentation; $last_unadjusted_indentation = $leading_spaces_beg; $last_leading_token = $token_beg; # Patch to make a line which is the end of a qw quote work with the # -lp option. Make $token_beg look like a closing token as some # type even if it is not. This variable will become # $last_leading_token at the end of this loop. Then, if the -lp # style is selected, and the next line is also a # closing token, it will not get more indentation than this line. # We need to do this because qw quotes (at present) only get # continuation indentation, not one level of indentation, so we # need to turn off the -lp indentation. # ... a picture is worth a thousand words: # perltidy -wn -gnu (Without this patch): # ok(defined( # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112 # 2981014)]) # )); # perltidy -wn -gnu (With this patch): # ok(defined( # $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112 # 2981014)]) # )); if ( $seqno_qw_closing && ( length($token_beg) > 1 || $token_beg eq '>' ) ) { $last_leading_token = ')'; } } #--------------------------------------------------------------------- # Rule: lines with leading closing tokens should not be outdented more # than the line which contained the corresponding opening token. #--------------------------------------------------------------------- # Updated per bug report in alex_bug.pl: we must not # mess with the indentation of closing logical braces, so # we must treat something like '} else {' as if it were # an isolated brace my $is_isolated_block_brace = $block_type_beg && ( $i_terminal == $ibeg || $is_if_elsif_else_unless_while_until_for_foreach{$block_type_beg} ); # only do this for a ':; which is aligned with its leading '?' my $is_unaligned_colon = $type_beg eq ':' && !$is_leading; if ( defined($opening_indentation) && !$leading_paren_arrow # MOJO patch && !$is_isolated_block_brace && !$is_unaligned_colon ) { if ( get_spaces($opening_indentation) > get_spaces($indentation) ) { $indentation = $opening_indentation; } } #---------------------------------------------------- # remember the indentation of each line of this batch #---------------------------------------------------- push @{$rindentation_list}, $indentation; #--------------------------------------------- # outdent lines with certain leading tokens... #--------------------------------------------- if ( # must be first word of this batch $ibeg == 0 # and ... && ( # certain leading keywords if requested $rOpts_outdent_keywords && $type_beg eq 'k' && $outdent_keyword{$token_beg} # or labels if requested || $rOpts_outdent_labels && $type_beg eq 'J' # or static block comments if requested || $is_static_block_comment && $rOpts_outdent_static_block_comments ) ) { my $space_count = leading_spaces_to_go($ibeg); if ( $space_count > 0 ) { $space_count -= $rOpts_continuation_indentation; $is_outdented_line = 1; if ( $space_count < 0 ) { $space_count = 0 } # do not promote a spaced static block comment to non-spaced; # this is not normally necessary but could be for some # unusual user inputs (such as -ci = -i) if ( $type_beg eq '#' && $space_count == 0 ) { $space_count = 1; } $indentation = $space_count; } } return ( $indentation, $lev, $level_end, $i_terminal, $is_outdented_line, ); } ## end sub get_final_indentation sub get_closing_token_indentation { # Determine indentation adjustment for a line with a leading closing # token - i.e. one of these: ) ] } : my ( $self, # $ibeg, $iend, $ri_first, $ri_last, $rindentation_list, $level_jump, $i_terminal, $is_semicolon_terminated, $seqno_qw_closing, ) = @_; my $adjust_indentation = 0; my $default_adjust_indentation = $adjust_indentation; my $terminal_type = $types_to_go[$i_terminal]; my $type_beg = $types_to_go[$ibeg]; my $token_beg = $tokens_to_go[$ibeg]; my $level_beg = $levels_to_go[$ibeg]; my $block_type_beg = $block_type_to_go[$ibeg]; my $leading_spaces_beg = $leading_spaces_to_go[$ibeg]; my $seqno_beg = $type_sequence_to_go[$ibeg]; my $is_closing_type_beg = $is_closing_type{$type_beg}; my ( $opening_indentation, $opening_offset, $is_leading, $opening_exists ); # Honor any flag to reduce -ci set by the -bbxi=n option if ( $seqno_beg && $self->[_rwant_reduced_ci_]->{$seqno_beg} ) { # if this is an opening, it must be alone on the line ... if ( $is_closing_type{$type_beg} || $ibeg == $i_terminal ) { $adjust_indentation = 1; } # ... or a single welded unit (fix for b1173) elsif ($total_weld_count) { my $K_beg = $K_to_go[$ibeg]; my $Kterm = $K_to_go[$i_terminal]; my $Kterm_test = $self->[_rK_weld_left_]->{$Kterm}; if ( defined($Kterm_test) && $Kterm_test >= $K_beg ) { $Kterm = $Kterm_test; } if ( $Kterm == $K_beg ) { $adjust_indentation = 1 } } else { ## ok } } my $ris_bli_container = $self->[_ris_bli_container_]; my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0; # Update the $is_bli flag as we go. It is initially 1. # We note seeing a leading opening brace by setting it to 2. # If we get to the closing brace without seeing the opening then we # turn it off. This occurs if the opening brace did not get output # at the start of a line, so we will then indent the closing brace # in the default way. if ( $is_bli_beg && $is_bli_beg == 1 ) { my $K_opening_container = $self->[_K_opening_container_]; my $K_opening = $K_opening_container->{$seqno_beg}; my $K_beg = $K_to_go[$ibeg]; if ( $K_beg eq $K_opening ) { $ris_bli_container->{$seqno_beg} = $is_bli_beg = 2; } else { $is_bli_beg = 0 } } # QW PATCH for the combination -lp -wn # For -lp formatting use $ibeg_weld_fix to get around the problem # that with -lp type formatting the opening and closing tokens to not # have sequence numbers. my $ibeg_weld_fix = $ibeg; if ( $seqno_qw_closing && $total_weld_count ) { my $i_plus = $inext_to_go[$ibeg]; if ( $i_plus <= $max_index_to_go ) { my $K_plus = $K_to_go[$i_plus]; if ( defined( $self->[_rK_weld_left_]->{$K_plus} ) ) { $ibeg_weld_fix = $i_plus; } } } # if we are at a closing token of some type.. if ( $is_closing_type_beg || $seqno_qw_closing ) { my $K_beg = $K_to_go[$ibeg]; # get the indentation of the line containing the corresponding # opening token ( $opening_indentation, $opening_offset, $is_leading, $opening_exists ) = $self->get_opening_indentation( $ibeg_weld_fix, $ri_first, $ri_last, $rindentation_list, $seqno_qw_closing ); # Patch for rt144979, part 1. Coordinated with part 2. # Do not undo ci for a cuddled closing brace control; it # needs to be treated exactly the same ci as an isolated # closing brace. my $is_cuddled_closing_brace = $seqno_beg && $self->[_ris_cuddled_closing_brace_]->{$seqno_beg}; # First set the default behavior: if ( # default behavior is to outdent closing lines # of the form: "); }; ]; )->xxx;" $is_semicolon_terminated # and 'cuddled parens' of the form: ")->pack(". Bug fix for RT # #123749]: the TYPES here were incorrectly ')' and '('. The # corrected TYPES are '}' and '{'. But skip a cuddled block. || ( $terminal_type eq '{' && $type_beg eq '}' && ( $nesting_depth_to_go[$iend] + 1 == $nesting_depth_to_go[$ibeg] ) && !$is_cuddled_closing_brace ) # remove continuation indentation for any line like # } ... { # or without ending '{' and unbalanced, such as # such as '}->{$operator}' || ( $type_beg eq '}' && ( $types_to_go[$iend] eq '{' || $levels_to_go[$iend] < $level_beg ) # but not if a cuddled block && !$is_cuddled_closing_brace ) # and when the next line is at a lower indentation level... # PATCH #1: and only if the style allows undoing continuation # for all closing token types. We should really wait until # the indentation of the next line is known and then make # a decision, but that would require another pass. # PATCH #2: and not if this token is under -xci control || ( $level_jump < 0 && !$some_closing_token_indentation && !$self->[_rseqno_controlling_my_ci_]->{$K_beg} ) # Patch for -wn=2, multiple welded closing tokens || ( $i_terminal > $ibeg && $is_closing_type{ $types_to_go[$iend] } ) # Alternate Patch for git #51, isolated closing qw token not # outdented if no-delete-old-newlines is set. This works, but # a more general patch elsewhere fixes the real problem: ljump. # || ( $seqno_qw_closing && $ibeg == $i_terminal ) ) { $adjust_indentation = 1; } # outdent something like '),' if ( $terminal_type eq ',' # Removed this constraint for -wn # OLD: allow just one character before the comma # && $i_terminal == $ibeg + 1 # require LIST environment; otherwise, we may outdent too much - # this can happen in calls without parentheses (overload.t); && $self->is_in_list_by_i($i_terminal) ) { $adjust_indentation = 1; } # undo continuation indentation of a terminal closing token if # it is the last token before a level decrease. This will allow # a closing token to line up with its opening counterpart, and # avoids an indentation jump larger than 1 level. my $rLL = $self->[_rLL_]; my $Klimit = $self->[_Klimit_]; if ( $i_terminal == $ibeg && $is_closing_type_beg && defined($K_beg) && $K_beg < $Klimit ) { my $K_plus = $K_beg + 1; my $type_plus = $rLL->[$K_plus]->[_TYPE_]; if ( $type_plus eq 'b' && $K_plus < $Klimit ) { $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_]; } if ( $type_plus eq '#' && $K_plus < $Klimit ) { $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_]; if ( $type_plus eq 'b' && $K_plus < $Klimit ) { $type_plus = $rLL->[ ++$K_plus ]->[_TYPE_]; } # Note: we have skipped past just one comment (perhaps a # side comment). There could be more, and we could easily # skip past all the rest with the following code, or with a # while loop. It would be rare to have to do this, and # those block comments would still be indented, so it would # to leave them indented. So it seems best to just stop at # a maximum of one comment. ##if ($type_plus eq '#') { ## $K_plus = $self->K_next_code($K_plus); ##} } if ( !$is_bli_beg && defined($K_plus) ) { my $lev = $level_beg; my $level_next = $rLL->[$K_plus]->[_LEVEL_]; # and do not undo ci if it was set by the -xci option $adjust_indentation = 1 if ( $level_next < $lev && !$self->[_rseqno_controlling_my_ci_]->{$K_beg} ); } # Patch for RT #96101, in which closing brace of anonymous subs # was not outdented. We should look ahead and see if there is # a level decrease at the next token (i.e., a closing token), # but right now we do not have that information. For now # we see if we are in a list, and this works well. # See test files 'sub*.t' for good test cases. if ( !$rOpts_indent_closing_brace && $block_type_beg && $self->[_ris_asub_block_]->{$seqno_beg} && $self->is_in_list_by_i($i_terminal) ) { ( $opening_indentation, $opening_offset, $is_leading, $opening_exists ) = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last, $rindentation_list ); my $indentation = $leading_spaces_beg; if ( defined($opening_indentation) && get_spaces($indentation) > get_spaces($opening_indentation) ) { $adjust_indentation = 1; } } } # YVES patch 1 of 2: # Undo ci of line with leading closing eval brace, # but not beyond the indentation of the line with # the opening brace. if ( $block_type_beg && $block_type_beg eq 'eval' && !ref($leading_spaces_beg) && !$rOpts_indent_closing_brace ) { ( $opening_indentation, $opening_offset, $is_leading, $opening_exists ) = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last, $rindentation_list ); my $indentation = $leading_spaces_beg; if ( defined($opening_indentation) && get_spaces($indentation) > get_spaces($opening_indentation) ) { $adjust_indentation = 1; } } # patch for issue git #40: -bli setting has priority $adjust_indentation = 0 if ($is_bli_beg); $default_adjust_indentation = $adjust_indentation; # Now modify default behavior according to user request: # handle option to indent non-blocks of the form ); }; ]; # But don't do special indentation to something like ')->pack(' if ( !$block_type_beg ) { # Note that logical padding has already been applied, so we may # need to remove some spaces to get a valid hash key. my $tok = $token_beg; my $cti = $closing_token_indentation{$tok}; # Fix the value of 'cti' for an isolated non-welded closing qw # delimiter. if ( $seqno_qw_closing && $ibeg_weld_fix == $ibeg ) { # A quote delimiter which is not a container will not have # a cti value defined. In this case use the style of a # paren. For example # my @fars = ( # qw< # far # farfar # farfars-far # >, # ); if ( !defined($cti) && length($tok) == 1 ) { # something other than ')', '}', ']' ; use flag for ')' $cti = $closing_token_indentation{')'}; # But for now, do not outdent non-container qw # delimiters because it would would change existing # formatting. if ( $tok ne '>' ) { $cti = 3 } } # A non-welded closing qw cannot currently use -cti=1 # because that option requires a sequence number to find # the opening indentation, and qw quote delimiters are not # sequenced items. if ( defined($cti) && $cti == 1 ) { $cti = 0 } } if ( !defined($cti) ) { # $cti may not be defined for several reasons. # -padding may have been applied so the character # has a length > 1 # - we may have welded to a closing quote token. # Here is an example (perltidy -wn): # __PACKAGE__->load_components( qw( # > Core # > # > ) ); $adjust_indentation = 0; } elsif ( $cti == 1 ) { if ( $i_terminal <= $ibeg + 1 || $is_semicolon_terminated ) { $adjust_indentation = 2; } else { $adjust_indentation = 0; } } elsif ( $cti == 2 ) { if ($is_semicolon_terminated) { $adjust_indentation = 3; } else { $adjust_indentation = 0; } } elsif ( $cti == 3 ) { $adjust_indentation = 3; } else { ## cti == 0 } } # handle option to indent blocks else { if ( $rOpts_indent_closing_brace && ( $i_terminal == $ibeg # isolated terminal '}' || $is_semicolon_terminated ) ) # } xxxx ; { $adjust_indentation = 3; } } } ## end if ( $is_closing_type_beg || $seqno_qw_closing ) # if line begins with a ':', align it with any # previous line leading with corresponding ? elsif ( $type_beg eq ':' ) { ( $opening_indentation, $opening_offset, $is_leading, $opening_exists ) = $self->get_opening_indentation( $ibeg, $ri_first, $ri_last, $rindentation_list ); if ($is_leading) { $adjust_indentation = 2; } } else { # not a closing type } return ( $adjust_indentation, $default_adjust_indentation, $opening_indentation, $opening_offset, $is_leading, $opening_exists, ); } ## end sub get_closing_token_indentation } ## end closure get_final_indentation sub get_opening_indentation { # get the indentation of the line which output the opening token # corresponding to a given closing token in the current output batch. # # given: # $i_closing - index in this line of a closing token ')' '}' or ']' # # $ri_first - reference to list of the first index $i for each output # line in this batch # $ri_last - reference to list of the last index $i for each output line # in this batch # $rindentation_list - reference to a list containing the indentation # used for each line. # $qw_seqno - optional sequence number to use if normal seqno not defined # (NOTE: would be more general to just look this up from index i) # # return: # -the indentation of the line which contained the opening token # which matches the token at index $i_opening # -and its offset (number of columns) from the start of the line # my ( $self, $i_closing, $ri_first, $ri_last, $rindentation_list, $qw_seqno ) = @_; # first, see if the opening token is in the current batch my $i_opening = $mate_index_to_go[$i_closing]; my ( $indent, $offset, $is_leading, $exists ); $exists = 1; if ( defined($i_opening) && $i_opening >= 0 ) { # it is..look up the indentation ( $indent, $offset, $is_leading ) = lookup_opening_indentation( $i_opening, $ri_first, $ri_last, $rindentation_list ); } # if not, it should have been stored in the hash by a previous batch else { my $seqno = $type_sequence_to_go[$i_closing]; $seqno = $qw_seqno unless ($seqno); ( $indent, $offset, $is_leading, $exists ) = get_saved_opening_indentation($seqno); } return ( $indent, $offset, $is_leading, $exists ); } ## end sub get_opening_indentation sub examine_vertical_tightness_flags { my ($self) = @_; # For efficiency, we will set a flag to skip all calls to sub # 'set_vertical_tightness_flags' if vertical tightness is not possible with # the user input parameters. If vertical tightness is possible, we will # simply leave the flag undefined and return. # Vertical tightness is never possible with --freeze-whitespace if ($rOpts_freeze_whitespace) { $self->[_no_vertical_tightness_flags_] = 1; return; } # This sub is coordinated with sub set_vertical_tightness_flags. # The Section numbers in the following comments are the sections # in sub set_vertical_tightness_flags: # Examine controls for Section 1a: return if ($rOpts_line_up_parentheses); foreach my $key ( keys %opening_vertical_tightness ) { return if ( $opening_vertical_tightness{$key} ); } # Examine controls for Section 1b: foreach my $key ( keys %closing_vertical_tightness ) { return if ( $closing_vertical_tightness{$key} ); } # Examine controls for Section 1c: foreach my $key ( keys %opening_token_right ) { return if ( $opening_token_right{$key} ); } # Examine controls for Section 1d: foreach my $key ( keys %stack_opening_token ) { return if ( $stack_opening_token{$key} ); } foreach my $key ( keys %stack_closing_token ) { return if ( $stack_closing_token{$key} ); } # Examine controls for Section 2: return if ($rOpts_block_brace_vertical_tightness); # Examine controls for Section 3: return if ($rOpts_stack_closing_block_brace); # None of the controls used for vertical tightness are set, so # we can skip all calls to sub set_vertical_tightness_flags $self->[_no_vertical_tightness_flags_] = 1; return; } ## end sub examine_vertical_tightness_flags sub set_vertical_tightness_flags { my ( $self, $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last, $ending_in_quote, $closing_side_comment ) = @_; # Define vertical tightness controls for the nth line of a batch. # Note: do not call this sub for a block comment or if # $rOpts_freeze_whitespace is set. # These parameters are passed to the vertical aligner to indicated # if we should combine this line with the next line to achieve the # desired vertical tightness. This was previously an array but # has been converted to a hash: # old hash Meaning # index key # # 0 _vt_type: 1=opening non-block 2=closing non-block # 3=opening block brace 4=closing block brace # # 1a _vt_opening_flag: 1=no multiple steps, 2=multiple steps ok # 1b _vt_closing_flag: spaces of padding to use if closing # 2 _vt_seqno: sequence number of container # 3 _vt_valid flag: do not append if this flag is false. Will be # true if appropriate -vt flag is set. Otherwise, Will be # made true only for 2 line container in parens with -lp # 4 _vt_seqno_beg: sequence number of first token of line # 5 _vt_seqno_end: sequence number of last token of line # 6 _vt_min_lines: min number of lines for joining opening cache, # 0=no constraint # 7 _vt_max_lines: max number of lines for joining opening cache, # 0=no constraint # The vertical tightness mechanism can add whitespace, so whitespace can # continually increase if we allowed it when the -fws flag is set. # See case b499 for an example. # Define these values... my $vt_type = 0; my $vt_opening_flag = 0; my $vt_closing_flag = 0; my $vt_seqno = 0; my $vt_valid_flag = 0; my $vt_seqno_beg = 0; my $vt_seqno_end = 0; my $vt_min_lines = 0; my $vt_max_lines = 0; # Uses these global parameters: # $rOpts_block_brace_tightness # $rOpts_block_brace_vertical_tightness # $rOpts_stack_closing_block_brace # $rOpts_line_up_parentheses # %opening_vertical_tightness # %closing_vertical_tightness # %opening_token_right # %stack_closing_token # %stack_opening_token #-------------------------------------------------------------- # Vertical Tightness Flags Section 1: # Handle Lines 1 .. n-1 but not the last line # For non-BLOCK tokens, we will need to examine the next line # too, so we won't consider the last line. #-------------------------------------------------------------- if ( $n < $n_last_line ) { #-------------------------------------------------------------- # Vertical Tightness Flags Section 1a: # Look for Type 1, last token of this line is a non-block opening token #-------------------------------------------------------------- my $ibeg_next = $ri_first->[ $n + 1 ]; my $token_end = $tokens_to_go[$iend]; my $iend_next = $ri_last->[ $n + 1 ]; if ( $type_sequence_to_go[$iend] && !$block_type_to_go[$iend] && $is_opening_token{$token_end} && ( $opening_vertical_tightness{$token_end} > 0 # allow 2-line method call to be closed up || ( $rOpts_line_up_parentheses && $token_end eq '(' && $self->[_rlp_object_by_seqno_] ->{ $type_sequence_to_go[$iend] } && $iend > $ibeg && $types_to_go[ $iend - 1 ] ne 'b' ) ) ) { # avoid multiple jumps in nesting depth in one line if # requested my $ovt = $opening_vertical_tightness{$token_end}; # Turn off the -vt flag if the next line ends in a weld. # This avoids an instability with one-line welds (fixes b1183). my $type_end_next = $types_to_go[$iend_next]; $ovt = 0 if ( $self->[_rK_weld_left_]->{ $K_to_go[$iend_next] } && $is_closing_type{$type_end_next} ); # The flag '_rbreak_container_' avoids conflict of -bom and -pt=1 # or -pt=2; fixes b1270. See similar patch above for $cvt. my $seqno = $type_sequence_to_go[$iend]; if ( $ovt && $seqno && $self->[_rbreak_container_]->{$seqno} ) { $ovt = 0; } # The flag '_rmax_vertical_tightness_' avoids welding conflicts. if ( defined( $self->[_rmax_vertical_tightness_]->{$seqno} ) ) { $ovt = min( $ovt, $self->[_rmax_vertical_tightness_]->{$seqno} ); } if ( $ovt >= 2 || ( $nesting_depth_to_go[ $iend_next + 1 ] == $nesting_depth_to_go[$ibeg_next] ) ) { # If -vt flag has not been set, mark this as invalid # and aligner will validate it if it sees the closing paren # within 2 lines. my $valid_flag = $ovt; $vt_type = 1; $vt_opening_flag = $ovt; $vt_seqno = $type_sequence_to_go[$iend]; $vt_valid_flag = $valid_flag; } } #-------------------------------------------------------------- # Vertical Tightness Flags Section 1b: # Look for Type 2, first token of next line is a non-block closing # token .. and be sure this line does not have a side comment #-------------------------------------------------------------- my $token_next = $tokens_to_go[$ibeg_next]; if ( $type_sequence_to_go[$ibeg_next] && !$block_type_to_go[$ibeg_next] && $is_closing_token{$token_next} && $types_to_go[$iend] ne '#' ) # for safety, shouldn't happen! { my $cvt = $closing_vertical_tightness{$token_next}; # Avoid conflict of -bom and -pvt=1 or -pvt=2, fixes b977, b1303 # See similar patch above for $ovt. my $seqno = $type_sequence_to_go[$ibeg_next]; if ( $cvt && $self->[_rbreak_container_]->{$seqno} ) { $cvt = 0; } # Implement cvt=3: like cvt=0 for assigned structures, like cvt=1 # otherwise. Added for rt136417. if ( $cvt == 3 ) { $cvt = $self->[_ris_assigned_structure_]->{$seqno} ? 0 : 1; } # The unusual combination -pvtc=2 -dws -naws can be unstable. # This fixes b1282, b1283. This can be moved to set_options. if ( $cvt == 2 && $rOpts_delete_old_whitespace && !$rOpts_add_whitespace ) { $cvt = 1; } # Fix for b1379, b1380, b1381, b1382, b1384 part 2, # instability with adding and deleting trailing commas: # Reducing -cvt=2 to =1 fixes stability for -wtc=b in b1379,1380. # Reducing -cvt>0 to =0 fixes stability for -wtc=b in b1381,1382. # Reducing -cvt>0 to =0 fixes stability for -wtc=m in b1384 if ( $cvt && $self->[_ris_bare_trailing_comma_by_seqno_]->{$seqno} ) { $cvt = 0; } if ( # Never append a trailing line like ')->pack(' because it # will throw off later alignment. So this line must start at a # deeper level than the next line (fix1 for welding, git #45). ( $nesting_depth_to_go[$ibeg_next] >= $nesting_depth_to_go[ $iend_next + 1 ] + 1 ) && ( $cvt == 2 || ( !$self->is_in_list_by_i($ibeg_next) && ( $cvt == 1 # allow closing up 2-line method calls || ( $rOpts_line_up_parentheses && $token_next eq ')' && $type_sequence_to_go[$ibeg_next] && $self->[_rlp_object_by_seqno_] ->{ $type_sequence_to_go[$ibeg_next] } ) ) ) ) ) { # decide which trailing closing tokens to append.. my $ok = 0; if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 } else { my $str = join( EMPTY_STRING, @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] ); # append closing token if followed by comment or ';' # or another closing token (fix2 for welding, git #45) if ( $str =~ /^b?[\)\]\}R#;]/ ) { $ok = 1 } } if ($ok) { my $valid_flag = $cvt; my $min_lines = 0; my $max_lines = 0; # Fix for b1187 and b1188: Blinking can occur if we allow # welded tokens to re-form into one-line blocks during # vertical alignment when -lp used. So for this case we # set the minimum number of lines to be 1 instead of 0. # The maximum should be 1 if -vtc is not used. If -vtc is # used, we turn the valid # flag off and set the maximum to 0. This is equivalent to # using a large number. my $seqno_ibeg_next = $type_sequence_to_go[$ibeg_next]; if ( $rOpts_line_up_parentheses && $total_weld_count && $seqno_ibeg_next && $self->[_rlp_object_by_seqno_]->{$seqno_ibeg_next} && $self->is_welded_at_seqno($seqno_ibeg_next) ) { $min_lines = 1; $max_lines = $cvt ? 0 : 1; $valid_flag = 0; } $vt_type = 2; $vt_closing_flag = $tightness{$token_next} == 2 ? 0 : 1; $vt_seqno = $type_sequence_to_go[$ibeg_next]; $vt_valid_flag = $valid_flag; $vt_min_lines = $min_lines; $vt_max_lines = $max_lines; } } } #-------------------------------------------------------------- # Vertical Tightness Flags Section 1c: # Implement the Opening Token Right flag (Type 2).. # If requested, move an isolated trailing opening token to the end of # the previous line which ended in a comma. We could do this # in sub recombine_breakpoints but that would cause problems # with -lp formatting. The problem is that indentation will # quickly move far to the right in nested expressions. By # doing it after indentation has been set, we avoid changes # to the indentation. Actual movement of the token takes place # in sub valign_output_step_B. # Note added 4 May 2021: the man page suggests that the -otr flags # are mainly for opening tokens following commas. But this seems # to have been generalized long ago to include other situations. # I checked the coding back to 2012 and it is essentially the same # as here, so it is best to leave this unchanged for now. #-------------------------------------------------------------- if ( $opening_token_right{ $tokens_to_go[$ibeg_next] } # previous line is not opening # (use -sot to combine with it) && !$is_opening_token{$token_end} # previous line ended in one of these # (add other cases if necessary; '=>' and '.' are not necessary && !$block_type_to_go[$ibeg_next] # this is a line with just an opening token && ( $iend_next == $ibeg_next || $iend_next == $ibeg_next + 2 && $types_to_go[$iend_next] eq '#' ) # Fix for case b1060 when both -baoo and -otr are set: # to avoid blinking, honor the -baoo flag over the -otr flag. && $token_end ne '||' && $token_end ne '&&' # Keep break after '=' if -lp. Fixes b964 b1040 b1062 b1083 b1089. # Generalized from '=' to $is_assignment to fix b1375. && !( $is_assignment{ $types_to_go[$iend] } && $rOpts_line_up_parentheses && $type_sequence_to_go[$ibeg_next] && $self->[_rlp_object_by_seqno_] ->{ $type_sequence_to_go[$ibeg_next] } ) # looks bad if we align vertically with the wrong container && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next] # give -kba priority over -otr (b1445) && !$self->[_rbreak_after_Klast_]->{ $K_to_go[$iend] } ) { my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; $vt_type = 2; $vt_closing_flag = $spaces; $vt_seqno = $type_sequence_to_go[$ibeg_next]; $vt_valid_flag = 1; } #-------------------------------------------------------------- # Vertical Tightness Flags Section 1d: # Stacking of opening and closing tokens (Type 2) #-------------------------------------------------------------- my $stackable; my $token_beg_next = $tokens_to_go[$ibeg_next]; # patch to make something like 'qw(' behave like an opening paren # (aran.t) if ( $types_to_go[$ibeg_next] eq 'q' ) { if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) { $token_beg_next = $1; } } if ( $is_closing_token{$token_end} && $is_closing_token{$token_beg_next} ) { # avoid instability of combo -bom and -sct; b1179 my $seq_next = $type_sequence_to_go[$ibeg_next]; $stackable = $stack_closing_token{$token_beg_next} unless ( $block_type_to_go[$ibeg_next] || $seq_next && $self->[_rbreak_container_]->{$seq_next} ); } elsif ($is_opening_token{$token_end} && $is_opening_token{$token_beg_next} ) { $stackable = $stack_opening_token{$token_beg_next} unless ( $block_type_to_go[$ibeg_next] ) ; # shouldn't happen; just checking } else { ## not stackable } if ($stackable) { my $is_semicolon_terminated; if ( $n + 1 == $n_last_line ) { my ( $terminal_type, $i_terminal ) = terminal_type_i( $ibeg_next, $iend_next ); $is_semicolon_terminated = $terminal_type eq ';' && $nesting_depth_to_go[$iend_next] < $nesting_depth_to_go[$ibeg_next]; } # this must be a line with just an opening token # or end in a semicolon if ( $is_semicolon_terminated || ( $iend_next == $ibeg_next || $iend_next == $ibeg_next + 2 && $types_to_go[$iend_next] eq '#' ) ) { my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; $vt_type = 2; $vt_closing_flag = $spaces; $vt_seqno = $type_sequence_to_go[$ibeg_next]; $vt_valid_flag = 1; } } } #-------------------------------------------------------------- # Vertical Tightness Flags Section 2: # Handle type 3, opening block braces on last line of the batch # Check for a last line with isolated opening BLOCK curly #-------------------------------------------------------------- elsif ($rOpts_block_brace_vertical_tightness && $ibeg eq $iend && $types_to_go[$iend] eq '{' && $block_type_to_go[$iend] && $block_type_to_go[$iend] =~ /$block_brace_vertical_tightness_pattern/ ) { $vt_type = 3; $vt_opening_flag = $rOpts_block_brace_vertical_tightness; $vt_seqno = 0; $vt_valid_flag = 1; } #-------------------------------------------------------------- # Vertical Tightness Flags Section 3: # Handle type 4, a closing block brace on the last line of the batch Check # for a last line with isolated closing BLOCK curly # Patch: added a check for any new closing side comment which the # -csc option may generate. If it exists, there will be a side comment # so we cannot combine with a brace on the next line. This issue # occurs for the combination -scbb and -csc is used. #-------------------------------------------------------------- elsif ($rOpts_stack_closing_block_brace && $ibeg eq $iend && $block_type_to_go[$iend] && $types_to_go[$iend] eq '}' && ( !$closing_side_comment || $n < $n_last_line ) ) { my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1; $vt_type = 4; $vt_closing_flag = $spaces; $vt_seqno = $type_sequence_to_go[$iend]; $vt_valid_flag = 1; } else { ## none of the above } # get the sequence numbers of the ends of this line $vt_seqno_beg = $type_sequence_to_go[$ibeg]; if ( !$vt_seqno_beg ) { if ( $types_to_go[$ibeg] eq 'q' ) { $vt_seqno_beg = $self->get_seqno( $ibeg, $ending_in_quote ); } else { $vt_seqno_beg = EMPTY_STRING } } $vt_seqno_end = $type_sequence_to_go[$iend]; if ( !$vt_seqno_end ) { if ( $types_to_go[$iend] eq 'q' ) { $vt_seqno_end = $self->get_seqno( $iend, $ending_in_quote ); } else { $vt_seqno_end = EMPTY_STRING } } if ( !defined($vt_seqno) ) { $vt_seqno = EMPTY_STRING } my $rvertical_tightness_flags = { _vt_type => $vt_type, _vt_opening_flag => $vt_opening_flag, _vt_closing_flag => $vt_closing_flag, _vt_seqno => $vt_seqno, _vt_valid_flag => $vt_valid_flag, _vt_seqno_beg => $vt_seqno_beg, _vt_seqno_end => $vt_seqno_end, _vt_min_lines => $vt_min_lines, _vt_max_lines => $vt_max_lines, }; return ($rvertical_tightness_flags); } ## end sub set_vertical_tightness_flags ########################################################## # CODE SECTION 14: Code for creating closing side comments ########################################################## { ## begin closure accumulate_csc_text # These routines are called once per batch when the --closing-side-comments flag # has been set. my %block_leading_text; my %block_opening_line_number; my $csc_new_statement_ok; my $csc_last_label; my %csc_block_label; my $accumulating_text_for_block; my $leading_block_text; my $rleading_block_if_elsif_text; my $leading_block_text_level; my $leading_block_text_length_exceeded; my $leading_block_text_line_length; my $leading_block_text_line_number; sub initialize_csc_vars { %block_leading_text = (); %block_opening_line_number = (); $csc_new_statement_ok = 1; $csc_last_label = EMPTY_STRING; %csc_block_label = (); $rleading_block_if_elsif_text = []; $accumulating_text_for_block = EMPTY_STRING; reset_block_text_accumulator(); return; } ## end sub initialize_csc_vars sub reset_block_text_accumulator { # save text after 'if' and 'elsif' to append after 'else' if ($accumulating_text_for_block) { ## ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) { if ( $is_if_elsif{$accumulating_text_for_block} ) { push @{$rleading_block_if_elsif_text}, $leading_block_text; } } $accumulating_text_for_block = EMPTY_STRING; $leading_block_text = EMPTY_STRING; $leading_block_text_level = 0; $leading_block_text_length_exceeded = 0; $leading_block_text_line_number = 0; $leading_block_text_line_length = 0; return; } ## end sub reset_block_text_accumulator sub set_block_text_accumulator { my ( $self, $i ) = @_; $accumulating_text_for_block = $tokens_to_go[$i]; if ( $accumulating_text_for_block !~ /^els/ ) { $rleading_block_if_elsif_text = []; } $leading_block_text = EMPTY_STRING; $leading_block_text_level = $levels_to_go[$i]; $leading_block_text_line_number = $self->get_output_line_number(); $leading_block_text_length_exceeded = 0; # this will contain the column number of the last character # of the closing side comment $leading_block_text_line_length = length($csc_last_label) + length($accumulating_text_for_block) + length( $rOpts->{'closing-side-comment-prefix'} ) + $leading_block_text_level * $rOpts_indent_columns + 3; return; } ## end sub set_block_text_accumulator sub accumulate_block_text { my ( $self, $i ) = @_; # accumulate leading text for -csc, ignoring any side comments if ( $accumulating_text_for_block && !$leading_block_text_length_exceeded && $types_to_go[$i] ne '#' ) { my $added_length = $token_lengths_to_go[$i]; $added_length += 1 if $i == 0; my $new_line_length = $leading_block_text_line_length + $added_length; # we can add this text if we don't exceed some limits.. if ( # we must not have already exceeded the text length limit length($leading_block_text) < $rOpts_closing_side_comment_maximum_text # and either: # the new total line length must be below the line length limit # or the new length must be below the text length limit # (ie, we may allow one token to exceed the text length limit) && ( $new_line_length < $maximum_line_length_at_level[$leading_block_text_level] || length($leading_block_text) + $added_length < $rOpts_closing_side_comment_maximum_text ) # UNLESS: we are adding a closing paren before the brace we seek. # This is an attempt to avoid situations where the ... to be # added are longer than the omitted right paren, as in: # foreach my $item (@a_rather_long_variable_name_here) { # &whatever; # } ## end foreach my $item (@a_rather_long_variable_name_here... || ( $tokens_to_go[$i] eq ')' && ( ( $i + 1 <= $max_index_to_go && $block_type_to_go[ $i + 1 ] && $block_type_to_go[ $i + 1 ] eq $accumulating_text_for_block ) || ( $i + 2 <= $max_index_to_go && $block_type_to_go[ $i + 2 ] && $block_type_to_go[ $i + 2 ] eq $accumulating_text_for_block ) ) ) ) { # add an extra space at each newline if ( $i == 0 && $types_to_go[$i] ne 'b' ) { $leading_block_text .= SPACE; } # add the token text $leading_block_text .= $tokens_to_go[$i]; $leading_block_text_line_length = $new_line_length; } # show that text was truncated if necessary elsif ( $types_to_go[$i] ne 'b' ) { $leading_block_text_length_exceeded = 1; $leading_block_text .= '...'; } else { ## ok } } return; } ## end sub accumulate_block_text sub accumulate_csc_text { my ($self) = @_; # called once per output buffer when -csc is used. Accumulates # the text placed after certain closing block braces. # Defines and returns the following for this buffer: my $block_leading_text = EMPTY_STRING; # the leading text of the last '}' my $rblock_leading_if_elsif_text; my $i_block_leading_text = -1; # index of token owning block_leading_text my $block_line_count = 100; # how many lines the block spans my $terminal_type = 'b'; # type of last nonblank token my $i_terminal = 0; # index of last nonblank token my $terminal_block_type = EMPTY_STRING; # update most recent statement label $csc_last_label = EMPTY_STRING unless ($csc_last_label); if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] } my $block_label = $csc_last_label; # Loop over all tokens of this batch for my $i ( 0 .. $max_index_to_go ) { my $type = $types_to_go[$i]; my $block_type = $block_type_to_go[$i]; my $token = $tokens_to_go[$i]; $block_type = EMPTY_STRING unless ($block_type); # remember last nonblank token type if ( $type ne '#' && $type ne 'b' ) { $terminal_type = $type; $terminal_block_type = $block_type; $i_terminal = $i; } my $type_sequence = $type_sequence_to_go[$i]; if ( $block_type && $type_sequence ) { if ( $token eq '}' ) { # restore any leading text saved when we entered this block if ( defined( $block_leading_text{$type_sequence} ) ) { ( $block_leading_text, $rblock_leading_if_elsif_text ) = @{ $block_leading_text{$type_sequence} }; $i_block_leading_text = $i; delete $block_leading_text{$type_sequence}; $rleading_block_if_elsif_text = $rblock_leading_if_elsif_text; } if ( defined( $csc_block_label{$type_sequence} ) ) { $block_label = $csc_block_label{$type_sequence}; delete $csc_block_label{$type_sequence}; } # if we run into a '}' then we probably started accumulating # at something like a trailing 'if' clause..no harm done. if ( $accumulating_text_for_block && $levels_to_go[$i] <= $leading_block_text_level ) { my $lev = $levels_to_go[$i]; reset_block_text_accumulator(); } if ( defined( $block_opening_line_number{$type_sequence} ) ) { my $output_line_number = $self->get_output_line_number(); $block_line_count = $output_line_number - $block_opening_line_number{$type_sequence} + 1; delete $block_opening_line_number{$type_sequence}; } else { # Error: block opening line undefined for this line.. # This shouldn't be possible, but it is not a # significant problem. } } elsif ( $token eq '{' ) { my $line_number = $self->get_output_line_number(); $block_opening_line_number{$type_sequence} = $line_number; # set a label for this block, except for # a bare block which already has the label # A label can only be used on the next { if ( $block_type =~ /:$/ ) { $csc_last_label = EMPTY_STRING; } $csc_block_label{$type_sequence} = $csc_last_label; $csc_last_label = EMPTY_STRING; if ( $accumulating_text_for_block && $levels_to_go[$i] == $leading_block_text_level ) { if ( $accumulating_text_for_block eq $block_type ) { # save any leading text before we enter this block $block_leading_text{$type_sequence} = [ $leading_block_text, $rleading_block_if_elsif_text ]; $block_opening_line_number{$type_sequence} = $leading_block_text_line_number; reset_block_text_accumulator(); } else { # shouldn't happen, but not a serious error. # We were accumulating -csc text for block type # $accumulating_text_for_block and unexpectedly # encountered a '{' for block type $block_type. } } } else { ## should not get here DEVEL_MODE && Fault("token=$token should be '{' or '}' for block\n"); } } if ( $type eq 'k' && $csc_new_statement_ok && $is_if_elsif_else_unless_while_until_for_foreach{$token} && $token =~ /$closing_side_comment_list_pattern/ ) { $self->set_block_text_accumulator($i); } else { # note: ignoring type 'q' because of tricks being played # with 'q' for hanging side comments if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) { $csc_new_statement_ok = ( $block_type || $type eq 'J' || $type eq ';' ); } if ( $type eq ';' && $accumulating_text_for_block && $levels_to_go[$i] == $leading_block_text_level ) { reset_block_text_accumulator(); } else { $self->accumulate_block_text($i); } } } # Treat an 'else' block specially by adding preceding 'if' and # 'elsif' text. Otherwise, the 'end else' is not helpful, # especially for cuddled-else formatting. if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) { $block_leading_text = $self->make_else_csc_text( $i_terminal, $terminal_block_type, $block_leading_text, $rblock_leading_if_elsif_text ); } # if this line ends in a label then remember it for the next pass $csc_last_label = EMPTY_STRING; if ( $terminal_type eq 'J' ) { $csc_last_label = $tokens_to_go[$i_terminal]; } return ( $terminal_type, $i_terminal, $i_block_leading_text, $block_leading_text, $block_line_count, $block_label ); } ## end sub accumulate_csc_text sub make_else_csc_text { # create additional -csc text for an 'else' and optionally 'elsif', # depending on the value of switch # # = 0 add 'if' text to trailing else # = 1 same as 0 plus: # add 'if' to 'elsif's if can fit in line length # add last 'elsif' to trailing else if can fit in one line # = 2 same as 1 but do not check if exceed line length # # $rif_elsif_text = a reference to a list of all previous closing # side comments created for this if block # my ( $self, $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_; my $csc_text = $block_leading_text; if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 ) { return $csc_text; } my $count = @{$rif_elsif_text}; return $csc_text unless ($count); my $if_text = '[ if' . $rif_elsif_text->[0]; # always show the leading 'if' text on 'else' if ( $block_type eq 'else' ) { $csc_text .= $if_text; } # see if that's all if ( $rOpts_closing_side_comment_else_flag == 0 ) { return $csc_text; } my $last_elsif_text = EMPTY_STRING; if ( $count > 1 ) { $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ]; if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; } } # tentatively append one more item my $saved_text = $csc_text; if ( $block_type eq 'else' ) { $csc_text .= $last_elsif_text; } else { $csc_text .= SPACE . $if_text; } # all done if no length checks requested if ( $rOpts_closing_side_comment_else_flag == 2 ) { return $csc_text; } # undo it if line length exceeded my $length = length($csc_text) + length($block_type) + length( $rOpts->{'closing-side-comment-prefix'} ) + $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3; if ( $length > $maximum_line_length_at_level[$leading_block_text_level] ) { $csc_text = $saved_text; } return $csc_text; } ## end sub make_else_csc_text } ## end closure accumulate_csc_text { ## begin closure balance_csc_text # Some additional routines for handling the --closing-side-comments option my %matching_char; BEGIN { %matching_char = ( '{' => '}', '(' => ')', '[' => ']', '}' => '{', ')' => '(', ']' => '[', ); } ## end BEGIN sub balance_csc_text { # Append characters to balance a closing side comment so that editors # such as vim can correctly jump through code. # Simple Example: # input = ## end foreach my $foo ( sort { $b ... # output = ## end foreach my $foo ( sort { $b ...}) # NOTE: This routine does not currently filter out structures within # quoted text because the bounce algorithms in text editors do not # necessarily do this either (a version of vim was checked and # did not do this). # Some complex examples which will cause trouble for some editors: # while ( $mask_string =~ /\{[^{]*?\}/g ) { # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) { # if ( $1 eq '{' ) { # test file test1/braces.pl has many such examples. my ($csc) = @_; # loop to examine characters one-by-one, RIGHT to LEFT and # build a balancing ending, LEFT to RIGHT. foreach my $pos ( reverse( 0 .. length($csc) - 1 ) ) { my $char = substr( $csc, $pos, 1 ); # ignore everything except structural characters next unless ( $matching_char{$char} ); # pop most recently appended character my $top = chop($csc); # push it back plus the mate to the newest character # unless they balance each other. $csc = $csc . $top . $matching_char{$char} unless $top eq $char; } # return the balanced string return $csc; } ## end sub balance_csc_text } ## end closure balance_csc_text sub add_closing_side_comment { my ( $self, $ri_first, $ri_last ) = @_; my $rLL = $self->[_rLL_]; # add closing side comments after closing block braces if -csc used my ( $closing_side_comment, $cscw_block_comment ); #--------------------------------------------------------------- # Step 1: loop through all tokens of this line to accumulate # the text needed to create the closing side comments. Also see # how the line ends. #--------------------------------------------------------------- my ( $terminal_type, $i_terminal, $i_block_leading_text, $block_leading_text, $block_line_count, $block_label ) = $self->accumulate_csc_text(); #--------------------------------------------------------------- # Step 2: make the closing side comment if this ends a block #--------------------------------------------------------------- my $have_side_comment = $types_to_go[$max_index_to_go] eq '#'; # if this line might end in a block closure.. if ( $terminal_type eq '}' # Fix 1 for c091, this is only for blocks && $block_type_to_go[$i_terminal] # ..and either && ( # the block is long enough ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} ) # or there is an existing comment to check || ( $have_side_comment && $rOpts->{'closing-side-comment-warnings'} ) ) # .. and if this is one of the types of interest && $block_type_to_go[$i_terminal] =~ /$closing_side_comment_list_pattern/ # .. but not an anonymous sub # These are not normally of interest, and their closing braces are # often followed by commas or semicolons anyway. This also avoids # possible erratic output due to line numbering inconsistencies # in the cases where their closing braces terminate a line. && $block_type_to_go[$i_terminal] ne 'sub' # ..and the corresponding opening brace must is not in this batch # (because we do not need to tag one-line blocks, although this # should also be caught with a positive -csci value) && !defined( $mate_index_to_go[$i_terminal] ) # ..and either && ( # this is the last token (line doesn't have a side comment) !$have_side_comment # or the old side comment is a closing side comment || $tokens_to_go[$max_index_to_go] =~ /$closing_side_comment_prefix_pattern/ ) ) { # then make the closing side comment text if ($block_label) { $block_label .= SPACE } my $token = "$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]"; # append any extra descriptive text collected above if ( $i_block_leading_text == $i_terminal ) { $token .= $block_leading_text; } $token = balance_csc_text($token) if $rOpts->{'closing-side-comments-balanced'}; $token =~ s/\s*$//; # trim any trailing whitespace # handle case of existing closing side comment if ($have_side_comment) { # warn if requested and tokens differ significantly if ( $rOpts->{'closing-side-comment-warnings'} ) { my $old_csc = $tokens_to_go[$max_index_to_go]; my $new_csc = $token; $new_csc =~ s/\s+//g; # trim all whitespace $old_csc =~ s/\s+//g; # trim all whitespace $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures # trim trailing '...' my $new_trailing_dots = $new_csc =~ s/\.\.\.$//; $old_csc =~ s/\.\.\.\s*$//; # Patch to handle multiple closing side comments at # else and elsif's. These have become too complicated # to check, so if we see an indication of # '[ if' or '[ # elsif', then assume they were made # by perltidy. if ( $block_type_to_go[$i_terminal] eq 'else' ) { if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc } } elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) { if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc } } else { ## ok: neither else or elsif } # if old comment is contained in new comment, # only compare the common part. if ( length($new_csc) > length($old_csc) ) { $new_csc = substr( $new_csc, 0, length($old_csc) ); } # if the new comment is shorter and has been limited, # only compare the common part. if ( length($new_csc) < length($old_csc) && $new_trailing_dots ) { $old_csc = substr( $old_csc, 0, length($new_csc) ); } # any remaining difference? if ( $new_csc ne $old_csc ) { # just leave the old comment if we are below the threshold # for creating side comments if ( $block_line_count < $rOpts->{'closing-side-comment-interval'} ) { $token = undef; } # otherwise we'll make a note of it else { my $msg_line_number; my $K = $K_to_go[$i_terminal]; if ( defined($K) ) { $msg_line_number = $rLL->[$K]->[_LINE_INDEX_] + 1; } warning( "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n", $msg_line_number ); # save the old side comment in a new trailing block # comment my $timestamp = EMPTY_STRING; if ( $rOpts->{'timestamp'} ) { my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ]; $year += 1900; $month += 1; $timestamp = "$year-$month-$day"; } $cscw_block_comment = "## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]"; } } # No differences.. we can safely delete old comment if we # are below the threshold elsif ( $block_line_count < $rOpts->{'closing-side-comment-interval'} ) { # Since the line breaks have already been set, we have # to remove the token from the _to_go array and also # from the line range (this fixes issue c081). # Note that we can only get here if -cscw has been set # because otherwise the old comment is already deleted. $token = undef; my $ibeg = $ri_first->[-1]; my $iend = $ri_last->[-1]; if ( $iend > $ibeg && $iend == $max_index_to_go && $types_to_go[$max_index_to_go] eq '#' ) { $iend--; $max_index_to_go--; if ( $iend > $ibeg && $types_to_go[$max_index_to_go] eq 'b' ) { $iend--; $max_index_to_go--; } $ri_last->[-1] = $iend; } } else { ## above threshold, cannot delete } } # switch to the new csc (unless we deleted it!) if ($token) { my $len_tok = length($token); # NOTE: length no longer important my $added_len = $len_tok - $token_lengths_to_go[$max_index_to_go]; $tokens_to_go[$max_index_to_go] = $token; $token_lengths_to_go[$max_index_to_go] = $len_tok; my $K = $K_to_go[$max_index_to_go]; $rLL->[$K]->[_TOKEN_] = $token; $rLL->[$K]->[_TOKEN_LENGTH_] = $len_tok; $summed_lengths_to_go[ $max_index_to_go + 1 ] += $added_len; } } # handle case of NO existing closing side comment else { # To avoid inserting a new token in the token arrays, we # will just return the new side comment so that it can be # inserted just before it is needed in the call to the # vertical aligner. $closing_side_comment = $token; } } return ( $closing_side_comment, $cscw_block_comment ); } ## end sub add_closing_side_comment ############################ # CODE SECTION 15: Summarize ############################ sub wrapup { # This is the last routine called when a file is formatted. # Flush buffer and write any informative messages my ( $self, $severe_error ) = @_; $self->flush(); my $file_writer_object = $self->[_file_writer_object_]; $file_writer_object->decrement_output_line_number() ; # fix up line number since it was incremented we_are_at_the_last_line(); my $max_depth = $self->[_maximum_BLOCK_level_]; my $at_line = $self->[_maximum_BLOCK_level_at_line_]; write_logfile_entry( "Maximum leading structural depth is $max_depth in input at line $at_line\n" ); my $added_semicolon_count = $self->[_added_semicolon_count_]; my $first_added_semicolon_at = $self->[_first_added_semicolon_at_]; my $last_added_semicolon_at = $self->[_last_added_semicolon_at_]; if ( $added_semicolon_count > 0 ) { my $first = ( $added_semicolon_count > 1 ) ? "First" : EMPTY_STRING; my $what = ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was"; write_logfile_entry("$added_semicolon_count $what added:\n"); write_logfile_entry( " $first at input line $first_added_semicolon_at\n"); if ( $added_semicolon_count > 1 ) { write_logfile_entry( " Last at input line $last_added_semicolon_at\n"); } write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n"); write_logfile_entry("\n"); } my $deleted_semicolon_count = $self->[_deleted_semicolon_count_]; my $first_deleted_semicolon_at = $self->[_first_deleted_semicolon_at_]; my $last_deleted_semicolon_at = $self->[_last_deleted_semicolon_at_]; if ( $deleted_semicolon_count > 0 ) { my $first = ( $deleted_semicolon_count > 1 ) ? "First" : EMPTY_STRING; my $what = ( $deleted_semicolon_count > 1 ) ? "semicolons were" : "semicolon was"; write_logfile_entry( "$deleted_semicolon_count unnecessary $what deleted:\n"); write_logfile_entry( " $first at input line $first_deleted_semicolon_at\n"); if ( $deleted_semicolon_count > 1 ) { write_logfile_entry( " Last at input line $last_deleted_semicolon_at\n"); } write_logfile_entry(" (Use -ndsm to prevent semicolon deletion)\n"); write_logfile_entry("\n"); } my $embedded_tab_count = $self->[_embedded_tab_count_]; my $first_embedded_tab_at = $self->[_first_embedded_tab_at_]; my $last_embedded_tab_at = $self->[_last_embedded_tab_at_]; if ( $embedded_tab_count > 0 ) { my $first = ( $embedded_tab_count > 1 ) ? "First" : EMPTY_STRING; my $what = ( $embedded_tab_count > 1 ) ? "quotes or patterns" : "quote or pattern"; write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n"); write_logfile_entry( "This means the display of this script could vary with device or software\n" ); write_logfile_entry(" $first at input line $first_embedded_tab_at\n"); if ( $embedded_tab_count > 1 ) { write_logfile_entry( " Last at input line $last_embedded_tab_at\n"); } write_logfile_entry("\n"); } my $first_tabbing_disagreement = $self->[_first_tabbing_disagreement_]; my $last_tabbing_disagreement = $self->[_last_tabbing_disagreement_]; my $tabbing_disagreement_count = $self->[_tabbing_disagreement_count_]; my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_]; if ($first_tabbing_disagreement) { write_logfile_entry( "First indentation disagreement seen at input line $first_tabbing_disagreement\n" ); } my $first_btd = $self->[_first_brace_tabbing_disagreement_]; if ($first_btd) { my $msg = "First closing brace indentation disagreement started at input line $first_btd\n"; write_logfile_entry($msg); # leave a hint in the .ERR file if there was a brace error if ( get_saw_brace_error() ) { warning("NOTE: $msg") } } my $in_btd = $self->[_in_brace_tabbing_disagreement_]; if ($in_btd) { my $msg = "Ending with brace indentation disagreement which started at input line $in_btd\n"; write_logfile_entry($msg); # leave a hint in the .ERR file if there was a brace error if ( get_saw_brace_error() ) { warning("NOTE: $msg") } } if ($in_tabbing_disagreement) { my $msg = "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"; write_logfile_entry($msg); } else { if ($last_tabbing_disagreement) { write_logfile_entry( "Last indentation disagreement seen at input line $last_tabbing_disagreement\n" ); } else { write_logfile_entry("No indentation disagreement seen\n"); } } if ($first_tabbing_disagreement) { write_logfile_entry( "Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n" ); } write_logfile_entry("\n"); my $vao = $self->[_vertical_aligner_object_]; $vao->report_anything_unusual(); $file_writer_object->report_line_length_errors(); # Define the formatter self-check for convergence. $self->[_converged_] = $severe_error || $file_writer_object->get_convergence_check() || $rOpts->{'indent-only'}; return; } ## end sub wrapup } ## end package Perl::Tidy::Formatter 1;