/* The following comment was added by Kari Laitinen on 15.3.2005 This is one of the Prolog programs of which an experimental software maintenance tool called InName is made. InName is a disabbreviation tool with which it is possible convert abbreviated names of C/C++ programs to more understandable, natural names. The Prolog source programs of the InName tool are now declared "Open Source" programs. People who have contributed to the original development of these programs include Kari Laitinen http://www.naturalprogramming.com Neil Rowe U.S. Naval Postgraduate School Markku Heikkila Currently works at Nokia. Jorma Taramaa Currently works at Nokia. The InName tool was developed at VTT Electronics, a division of the Technical Reseach Centre of Finland. The work was funded by VTT Electronics and the EU. It is the wish of the original developers that the above names will be mentioned if these programs are exploited in the development of other disabbreviation tools. A description about the InName tool and theoretical discussion related to it can be found at Laitinen, K., Taramaa, J., Heikkila, M., and Rowe, N. C.. Enhancing Maintainability of Source Programs through Disabbreviation. The Journal of Systems and Software, Vol. 37, No. 2, 1997, pp. 117 - 128. The text of the original file begins below. */ /*--------------------------------------------------------------------------- VTT Electronics Quintus Prolog source program Embedded Software AMES - project Kari Laitinen File: inname_first_pass.pro Version: 0.1 Status: draft Accepted by: File history: 15.7.1994 v0.0 File separated from main program Kari Laitinen 28.10.1994 v0.1 Last modification Kari Laitinen ---------------------------------------------------------------------------*/ :- no_style_check(single_var). :- ensure_loaded(library(caseconv)). :- multifile reserved_word/1. /* This is static. */ :- multifile unknown_name/4, last_found_name_like_string/1, found_name/2, found_word/2. :- dynamic unknown_name/4, last_found_name_like_string/1, found_name/2, found_word/2. :- dynamic word_of_a_name/1. /*--------------------------------------------------------------------------- FIRST PASS: " L E X I N G " ----------------------------------------------------------------------------*/ handle_string_values_and_include_files :- last_found_name_like_string( Previously_read_string ), Previously_read_string == 'include', read_until_double_quote_visible, get0( Double_quote_character ), !. handle_string_values_and_include_files :- read_until_string_value_end, !. increment_line_number_if_necessary( Character_code ) :- Character_code =:= 10, increment_counter( line_number ), !. increment_line_number_if_necessary( Character_code ) :- !. include_command_starts( Next_character ) :- Next_character =:= 60, /* < */ last_found_name_like_string( Previously_stored_string ), Previously_stored_string == 'include', get0( The_strange_bracket_sign ), !. numeric_value_starts( Next_character ) :- character_is_a_digit( Next_character ), !. read_next_name( Name_string ) :- at_end_of_file, !, fail. read_next_name( Name_string ) :- peek_char( Next_character ), /* The following subroutine reads all comments, quoted strings, and include commands away before the possible name can be read. */ read_possible_comments_etc( Next_character ), get0( Current_character), increment_line_number_if_necessary( Current_character ), name_starting_character( Current_character ), read_possible_name( Current_character, Name_string ), !. read_next_name( Name_string ) :- !, read_next_name( Name_string ). read_possible_name( First_character, Name_string ) :- read_rest_of_name_characters( First_character, Character_list ), name( Name_string, Character_list), retractall( last_found_name_like_string( _ ) ), assert( last_found_name_like_string( Name_string ) ), \+ reserved_word( Name_string ), !. read_rest_of_name_characters( First_character, [ First_character | Rest_of_name ] ) :- peek_char( Next_character ), valid_name_body_character( Next_character ), get0( Name_body_character ), !, read_rest_of_name_characters( Name_body_character, Rest_of_name ). read_rest_of_name_characters( Last_character, [ Last_character | [] ] ) :- !. read_possible_comments_etc( Next_character ) :- comment_starts( Next_character ), read_until_comment_end, peek_char( Character_following_the_comment ), !, read_possible_comments_etc( Character_following_the_comment ). read_possible_comments_etc( Next_character ) :- quoted_string_value_starts( Next_character ), handle_string_values_and_include_files, peek_char( Character_following_the_string_value ), !, read_possible_comments_etc( Character_following_the_string_value ). read_possible_comments_etc( Next_character ) :- quoted_character_value_starts( Next_character ), read_until_character_value_end, peek_char( Character_following_the_character_value ), !, read_possible_comments_etc( Character_following_the_character_value ). read_possible_comments_etc( Next_character ) :- include_command_starts( Next_character ), read_until_include_command_end, peek_char( Character_following_the_include_command ), !, read_possible_comments_etc( Character_following_the_include_command ). read_possible_comments_etc( Next_character ) :- numeric_value_starts( Next_character ), read_until_numeric_value_end, peek_char( Character_following_the_numeric_value ), !, read_possible_comments_etc( Character_following_the_numeric_value ). read_possible_comments_etc( Next_character) :- !. read_possible_type_specifying_characters :- peek_char( Next_character ), character_is_a_type_specifier_of_c_numeric_value( Next_character ), get0( First_type_specifying_character ), peek_char( Character_after_first_type_specifier ), character_is_a_type_specifier_of_c_numeric_value( Character_after_first_type_specifier ), get0( Second_type_specifying_character ), !. read_possible_type_specifying_characters :- !. read_until_character_value_end :- get0( Character_which_can_be_a_backslash ), read_until_character_value_end__handle_backslash( Character_which_can_be_a_backslash, Character_following_the_possible_backslash_sequence ), Character_following_the_possible_backslash_sequence =:= 39, /* ' */ !. read_until_character_value_end :- !, read_until_character_value_end. read_until_character_value_end__handle_backslash( Character_which_can_be_a_backslash, Character_after_backslash_sequence ) :- Character_which_can_be_a_backslash =:= 92, /* \ */ get0( Character_following_the_backslash ), get0( Character_second_to_the_backslash ), !, read_until_character_value_end__handle_backslash( Character_second_to_the_backslash, Character_after_backslash_sequence ). read_until_character_value_end__handle_backslash( Non_backslash_character, Non_backslash_character ) :- !. read_until_comment_end :- read_until_comment_end( List_of_characters_from_the_comment ), process_comment_information( List_of_characters_from_the_comment ), !. read_until_comment_end( Character_list ) :- get0( Character ), increment_line_number_if_necessary( Character ), !, read_until_comment_end__make_list( Character, Character_list ). read_until_comment_end__make_list( Character, [Character | Character_list] ) :- \+ at_comment_end( Character ), !, read_until_comment_end( Character_list ). read_until_comment_end__make_list( Character, [ Character| [] ] ) :- !. read_until_double_quote_visible :- peek_char( Next_character ), Next_character =:= 34, !. read_until_double_quote_visible :- get0( Current_character ), !, read_until_double_quote_visible. read_until_hexadecimal_value_end :- peek_char( Next_character ), character_is_a_hexadecimal_digit( Next_character ), get0( Hexadecimal_digit ), !, read_until_hexadecimal_value_end. read_until_hexadecimal_value_end :- read_possible_type_specifying_characters, !. read_until_include_command_end :- /* This handles the case when 'include <' has been seen. */ get0( New_character ), New_character =:= 62, /* > */ !. read_until_include_command_end :- !, read_until_include_command_end. read_until_newline_visible :- peek_char( Next_character ), Next_character =:= 10, !. read_until_newline_visible :- get0( Current_character ), !, read_until_newline_visible. read_until_non_number_visible :- peek_char( Next_character ), \+ character_is_a_digit( Next_character ), !. read_until_non_number_visible :- get0( Numeric_character ), !, read_until_non_number_visible. read_until_numeric_value_end :- get0( Current_character ), peek_char( Next_character ), ( ( Current_character =:= 48 ), /* 0 */ ( Next_character =:= 88 ; /* X */ Next_character =:= 120 )), /* x */ get0( Hexadecimal_value_indicator ), read_until_hexadecimal_value_end, !. read_until_numeric_value_end :- read_until_non_number_visible, peek_char( Next_character ), Next_character =:= 101, /* e */ get0( Small_e_character ), /* When a C program is correct the next character must be either a number or "-". We do not, however, have to care about that character. */ get0( A_number_or_the_minus_sign ), read_until_non_number_visible, fail. read_until_numeric_value_end :- read_possible_type_specifying_characters, !. read_until_space_or_tab_visible :- peek_char( Next_character ), (( Next_character =:= 32 ) ; ( Next_character =:= 9 )), !. read_until_space_or_tab_visible :- get0( Current_character ), !, read_until_space_or_tab_visible. read_until_string_value_end :- /* When reading C strings we must handle the cases when they look like: "\\\\\\\\" "\"\"\"" etc. */ get0( Character_which_can_be_a_backslash ), read_until_string_value_end__handle_backslash( Character_which_can_be_a_backslash, Character_following_the_possible_backslash_sequence ), Character_following_the_possible_backslash_sequence =:= 34, /* " */ !. read_until_string_value_end :- !, read_until_string_value_end. read_until_string_value_end__handle_backslash( Character_which_can_be_a_backslash, Character_after_backslash_sequence ) :- Character_which_can_be_a_backslash =:= 92, /* \ */ get0( Character_following_the_backslash ), get0( Character_second_to_the_backslash ), !, read_until_string_value_end__handle_backslash( Character_second_to_the_backslash, Character_after_backslash_sequence ). read_until_string_value_end__handle_backslash( Non_backslash_character, Non_backslash_character ) :- !. at_comment_end( Character ) :- Character = 42, peek_char( Next_character ), Next_character = 47, get0( Forward_slash_character ), !. comment_starts( Next_character ) :- Next_character = 47, get0( Forward_slash_character ), peek_char( Character_following_the_forward_slash ), !, Character_following_the_forward_slash = 42, get0( Asterisk_character ). quoted_string_value_starts( Next_character ) :- Next_character = 34, get0( Double_quote_character ). quoted_character_value_starts( Next_character ) :- Next_character = 39, get0( Single_quote_character ). search_more_unknown_names :- read_next_name( Name_as_string ), decompose_a_name_and_check_its_words( Name_as_string ), update_name_occurrence_statistics( Name_as_string ), !, search_more_unknown_names. search_more_unknown_names :- !. search_unknown_names_in_program_file( File_name_as_string ) :- assert( last_found_name_like_string( 'dummy_name' ) ), reset_counter( line_number ), see( File_name_as_string ), /* Something needs to be done if the file cannot be opened for some reason. */ increment_counter( line_number ), search_more_unknown_names, seen, !. /*---------------------------------------------------------------------------- FIRST PASS: Decomposing different types of names and putting the unknown names into the database. ----------------------------------------------------------------------------*/ check_for_unknown_words( [ First_word | Rest_of_words ], [ First_unknown_word | Rest_of_unknown_words ] ) :- lower( First_word, Lowercased_first_word ), \+ dictionary_word( Lowercased_first_word ), \+ domain_word( Lowercased_first_word ), First_unknown_word = First_word, check_for_unknown_words( Rest_of_words, Rest_of_unknown_words ). check_for_unknown_words( [ First_word | Rest_of_words ], List_of_unknown_words ) :- check_for_unknown_words( Rest_of_words, List_of_unknown_words ). check_for_unknown_words( [], [] ) :- !. decompose_a_name_and_check_its_words( Name_as_string ) :- unknown_name( Name_as_string, List_of_line_numbers, List_of_all_words, List_of_unknown_words ), /* When a string already has been found to be an unknown name there is no need to decompose it any more. */ update_word_occurrence_statistics( List_of_all_words ), read_counter( line_number, Current_line_number ), append( List_of_line_numbers, [ Current_line_number ], New_list_of_line_numbers ), retract( unknown_name( Name_as_string, _, _, _ ) ), assertz( unknown_name( Name_as_string, New_list_of_line_numbers, List_of_all_words, List_of_unknown_words ) ). decompose_a_name_and_check_its_words( Name_as_string ) :- name( Name_as_string, Name_character_list ), get_rid_of_disturbing_underscores( Name_character_list, Clean_character_list ), decompose_a_name( Clean_character_list, List_of_words ), make_list_of_words_non_capitalized( List_of_words, List_of_decapitalized_words ), update_word_occurrence_statistics( List_of_decapitalized_words ), check_for_unknown_words( List_of_decapitalized_words, List_of_unknown_words ), update_database_with_unknown_name( Name_as_string, List_of_decapitalized_words, List_of_unknown_words ). /* The first rules handle the case when the list_of_unknown_words is empty. When the name seems to be a capitalized name we will add it in the database, though it would not contain any unknown words. The reason for this is that the tool attempts to make all names underscored. */ update_database_with_unknown_name( Name_as_string, List_of_words, [] ) :- name_can_be_a_capitalized_name( Name_as_string ), read_counter( line_number, Current_line_number ), /* We could test whether assertz or asserta would yield the fastest performance here. */ assertz( unknown_name( Name_as_string, [ Current_line_number ], List_of_words, [] ) ), !. update_database_with_unknown_name( _, _, [] ) :- !. update_database_with_unknown_name( Name_as_string, List_of_words, List_of_unknown_words ) :- read_counter( line_number, Current_line_number ), /* We could test whether assertz or asserta would yield the fastest performance here. */ assertz( unknown_name( Name_as_string, [ Current_line_number ], List_of_words, List_of_unknown_words ) ), !. calculate_and_print_name_occurrence_statistics :- read_next_name( Name_string ), update_name_occurrence_statistics( Name_string ), calculate_and_print_name_occurrence_statistics. calculate_and_print_name_occurrence_statistics :- print_name_occurrence_statistics. update_name_occurrence_statistics( Name_string ) :- found_name( Name_string, Occurrence_count ), retract( found_name( Name_string, Occurrence_count ) ), New_occurrence_count is Occurrence_count + 1, asserta( found_name( Name_string, New_occurrence_count ) ), !. update_name_occurrence_statistics( Name_string ) :- Name_occurrence_count is 1, asserta( found_name( Name_string, Name_occurrence_count ) ), increment_counter( number_of_found_names ), !. update_word_occurrence_statistics( [ First_word | Rest_of_words ] ) :- lower( First_word, Lowercased_first_word ), found_word( Lowercased_first_word, Occurrence_count ), retract( found_word( Lowercased_first_word, Occurrence_count ) ), New_occurrence_count is Occurrence_count + 1, asserta( found_word( Lowercased_first_word, New_occurrence_count ) ), !, update_word_occurrence_statistics( Rest_of_words ), !. update_word_occurrence_statistics( [ First_word | Rest_of_words ] ) :- lower( First_word, Lowercased_first_word ), Occurrence_count is 1, asserta( found_word( Lowercased_first_word, Occurrence_count ) ), increment_counter( number_of_found_words ), !, update_word_occurrence_statistics( Rest_of_words ), !. update_word_occurrence_statistics( [] ) :- !. print_name_occurrence_statistics :- found_name( Name_to_print, Name_occurrence_count ), write( Name_occurrence_count ), write( ' ' ), write( Name_to_print ), nl, increment_counter( total_number_of_names ), Name_occurrence_count < 3, increment_counter( number_of_rarely_used_names ), fail. print_name_occurrence_statistics :- nl, write( 'total_number_of_names = ' ), write_counter( total_number_of_names ), nl, write( 'number_of_rarely_used_names = '), write_counter( number_of_rarely_used_names ). list_all_names_from_the_file :- read_next_name( Name_string ), write( Name_string ), nl, list_all_names_from_the_file. list_all_names_from_the_file. /*----------------------------------------------------------------------------- DECOMPOSING A NAME. GRAMMAR OF NAMES. -----------------------------------------------------------------------------*/ decompose_a_name( Name_character_list, List_of_words ) :- phrase( grammatical_name( List_of_words ), Name_character_list ), !. decompose_a_name( Name_character_list, List_of_words ) :- name( Name_as_string, Name_character_list ), List_of_words = [ Name_as_string ], !. grammatical_name( List_of_words ) --> underscoreless_word_sequence( List_of_words ). grammatical_name( List_of_words ) --> underscoreless_word_sequence( Words_of_single_sequence ), "_", grammatical_name( Words_from_other_sequences ), { append( Words_of_single_sequence, Words_from_other_sequences, List_of_words ) }. underscoreless_word_sequence( List_of_words ) --> lowercase_word( Word_as_string ), { List_of_words = [ Word_as_string ] }. underscoreless_word_sequence( List_of_words ) --> uppercase_word( Word_as_string ), { List_of_words = [ Word_as_string ] }. underscoreless_word_sequence( List_of_words ) --> list_of_capitalized_words( List_of_words ). underscoreless_word_sequence( List_of_words ) --> lowercase_word( Word_as_string ), list_of_capitalized_words( List_of_capitalized_words ), { append( [ Word_as_string ], List_of_capitalized_words, List_of_words ) }. underscoreless_word_sequence( List_of_words ) --> uppercase_word( Word_as_string ), list_of_capitalized_words( List_of_capitalized_words ), { append( [ Word_as_string ], List_of_capitalized_words, List_of_words ) }. underscoreless_word_sequence( List_of_words ) --> list_of_capitalized_words( List_of_capitalized_words ), uppercase_word( Word_as_string ), { append( List_of_capitalized_words, [ Word_as_string ], List_of_words ) }. underscoreless_word_sequence( List_of_words ) --> lowercase_word( Lowercase_word ), list_of_capitalized_words( List_of_capitalized_words ), uppercase_word( Uppercase_word ), { append( [ Lowercase_word | List_of_capitalized_words ], [ Uppercase_word ], List_of_words ) }. underscoreless_word_sequence( List_of_words ) --> lowercase_word( Lowercase_word_as_string ), uppercase_word( Uppercase_word_as_string ), { append( [ Lowercase_word_as_string ], [ Uppercase_word_as_string ], List_of_words ) }. underscoreless_word_sequence( List_of_words ) --> word_starting_with_a_number( Word_as_string ), { List_of_words = [ Word_as_string ] }. list_of_capitalized_words( List_of_words ) --> capitalized_word( Word_as_string ), { List_of_words = [ Word_as_string ] }. list_of_capitalized_words( List_of_words ) --> capitalized_word( Word_as_string ), list_of_capitalized_words( List_of_trailing_words ), { append( [ Word_as_string ], List_of_trailing_words, List_of_words ) }. capitalized_word( Word_as_string ) --> uppercase_letter( First_character ), lowercase_letter_or_number( Second_character ), list_of_lowercase_letters_or_numbers( List_of_lowercase_letters_or_numbers ), { append([ First_character ], [ Second_character ], First_two_characters ), append( First_two_characters, List_of_lowercase_letters_or_numbers, Word_as_list ), name( Word_as_string, Word_as_list ) }. lowercase_word( Word_as_string ) --> lowercase_letter( First_character ), list_of_lowercase_letters_or_numbers( List_of_trailing_characters ), { append( [ First_character ], List_of_trailing_characters, Word_as_list ), name( Word_as_string, Word_as_list ) }. list_of_lowercase_letters_or_numbers( List_of_characters ) --> lowercase_letter_or_number( Lowercase_letter_or_number ), list_of_lowercase_letters_or_numbers( List_of_trailing_characters ), { append( [ Lowercase_letter_or_number ], List_of_trailing_characters, List_of_characters ) }. list_of_lowercase_letters_or_numbers( List_of_characters ) --> [], { List_of_characters = [] }. lowercase_letter_or_number( Possible_lowercase_letter_or_number ) --> [ Character_code ], { ( character_is_a_lowercase_letter( Character_code ) ; character_is_a_digit( Character_code ) ), Possible_lowercase_letter_or_number is Character_code }. lowercase_letter( Possible_lowercase_letter ) --> [ Character_code ], { character_is_a_lowercase_letter( Character_code ), Possible_lowercase_letter is Character_code }. uppercase_word( Word_as_string ) --> uppercase_letter( First_character ), list_of_uppercase_letters_or_numbers( List_of_trailing_characters ), { append( [ First_character ], List_of_trailing_characters, Word_as_list ), name( Word_as_string, Word_as_list ) }. list_of_uppercase_letters_or_numbers( List_of_characters ) --> uppercase_letter_or_number( Uppercase_letter_or_number ), list_of_uppercase_letters_or_numbers( List_of_trailing_characters ), { append( [ Uppercase_letter_or_number ], List_of_trailing_characters, List_of_characters ) }. list_of_uppercase_letters_or_numbers( List_of_characters ) --> [], { List_of_characters = [] }. uppercase_letter_or_number( Possible_uppercase_letter_or_number ) --> [ Character_code ], { ( character_is_an_uppercase_letter( Character_code ) ; character_is_a_digit( Character_code ) ), Possible_uppercase_letter_or_number is Character_code }. uppercase_letter( Possible_uppercase_letter ) --> [ Character_code ], { character_is_an_uppercase_letter( Character_code ), Possible_uppercase_letter is Character_code }. word_starting_with_a_number( Word_as_string ) --> a_digit( Character_code_for_the_digit ), list_of_any_characters_except_underscores( List_of_characters ), { append( [ Character_code_for_the_digit ], List_of_characters, Word_as_list ), /* Here we must use "atom_chars" instead of "name", because name tries to convert to a number first and this causes an error in the case of word starting with a number. */ atom_chars( Word_as_string, Word_as_list ) }. list_of_any_characters_except_underscores( List_of_characters ) --> [], { List_of_characters = [] }. list_of_any_characters_except_underscores( List_of_characters ) --> any_character_except_underscore( Character ), list_of_any_characters_except_underscores( Trailing_list_of_characters ), { append( [ Character ], Trailing_list_of_characters, List_of_characters ) }. any_character_except_underscore( Character_code ) --> [ Some_character ], { Some_character =\= 95, Character_code is Some_character }. a_digit( Character_code_for_the_digit ) --> [ Some_character_code ], { character_is_a_digit( Some_character_code ), Character_code_for_the_digit is Some_character_code }.