#!/bin/sh
# vim: ts=4 sw=4 et ft=sh
#
# This script is intended to summarize the contents of the statistics files
# generated by compiler/prog_item_stats.m. These contain a few lines
# of the form
#
#   MODULE modulename
#
# for context, but by far the bulk of their content (and all of their useful
# information) consists of lines of the form
#
#   section item_or_goal_category count
#
# which give number of times a given kind of item or goal occurs in the given
# section.
#
# This script computes and prints the total count for each category in each
# section.

ignore_errors=1
print_zero=0
while test "$#" -gt 0
do
    case "$1" in
    -e)
        # By default, we ignore malformed lines, which can happen when
        # two or more compiler invocations print out their statistics at the
        # same time. If the user specifies -e, we will print a message for
        # each malformed line.
        ignore_errors=0
        shift
        ;;
    -z)
        # Normally, we don't print anything for item or goal categories
        # that do not occur in a section. If the user specifies -z, we will
        # print categories even if their total count is zero.
        print_zero=1
        shift
        ;;
    *)
        break
        ;;
    esac
done

if test "$#" -eq 1
then
    filename="$1"
else
    echo "usage: item_stats [-e] [-z] stats_filename"
    exit 1
fi

gawk -e '
    {
        if (NF == 2 && $1 == "MODULE") {
            cur_module = $2;
        } else if (NF == 3) {
            section = $1;
            category = $2;
            count = $3;

            sections[section] = 0;
            sections["any_section"] = 0;
            categories[category] = 0;

            stats_total[section "@" category] += count;
            stats_total["any_section" "@" category] += count;

            if (count + 0 > stats_cmax[section "@" category]) {
                stats_cmax[section "@" category] = count + 0;
            }
            if (count + 0 > stats_cmax["any_section" "@" category]) {
                stats_cmax["any_section" "@" category] = count + 0;
            }
        } else {
            if (! ignore_errors) {
                printf("unexpected line in module %s: <%s>\n", cur_module, $0);
            }
        }
    }
END {
        for (section in sections) {
            if (section == "src_int") {
                sec_num = 1;
            } else if (section == "src_impl") {
                sec_num = 2;
            } else if (section == "src_impl_sub") {
                sec_num = 3;
            } else if (section == "int_imported") {
                sec_num = 11;
            } else if (section == "int_used") {
                sec_num = 12;
            } else if (section == "int_abstract_imported") {
                sec_num = 13;
            } else if (section == "int_for_opt_imported") {
                sec_num = 14;
            } else if (section == "opt_imported") {
                sec_num = 21;
            } else if (section == "any_section") {
                sec_num = 31;
            } else {
                sec_num = 41;
            }

            for (category in categories) {
                if (substr(category, 1, 5) == "item_") {
                    cat_kind = 1;
                } else {
                    cat_kind = 2;
                }

                total = stats_total[section "@" category];
                cmax = stats_cmax[section "@" category];
                if (total > 0 || print_zero) {
                    printf("%d %d %s %s %d% d\n",
                        sec_num, cat_kind, section, category, total, cmax);
                }
            }
        }
    }
' print_zero="${print_zero}" ignore_errors="${ignore_errors}" < "${filename}" |
sort --key=1n,1 --key=2n,2 --key=5nr,5 |
tee .z2 |
gawk '
    {
        if (NF +1 > 0 && $1 == "unexpected") {
            printf("%s\n", $0);
        } else if (NF == 6) {
            sec_num = $1;
            cat_kind = $2;
            section = $3;
            category = $4;
            count = $5;
            cmax = $6;

            if (! printed_header) {
                printf("%-22s %-22s %15s %15s\n",
                    "section", "category", "total", "maximum");
                printed_header = 1;
            }

            if (sec_num != prev_sec_num) {
                printf("\n");
                prev_sec_num = sec_num;
                prev_cat_kind = cat_kind;
            } else if (cat_kind != prev_cat_kind) {
                printf("\n");
                prev_cat_kind = cat_kind;
            }

            # We put commas between thousands, because without this,
            # it is more difficult to compare counts in different sections.
            count_str = thousands(count);
            cmax_str = thousands(cmax);

            printf("%-22s %-22s %15s %15s\n",
                section, category, count_str, cmax_str);
        }
    }

function thousands(n)
{
    str = "";
    if (sprintf("%d", n) == "0") {
        str = "0";
    } else {
        while (1) {
            last = n % 1000;
            prev = n / 1000;
            if (sprintf("%d", prev) == "0") {
                str = sprintf("%d", last) str;
                break;
            } else {
                str = "," sprintf("%03d", last) str;
            }
            n = prev;
        }
    }

    return str;
}
'
