Questioning Everything Propaganda

Home Tags
Login RSS
BXC Benford Fraud Analysis
You are viewing an old revision from 2025-11-24 04:13.
View the current, live version.

/* REXX */
/* ========================================================================== */
/* BXC - Benford Analysis Tool (REXX Port for OS/2 Warp / ArcaOS)             */
/* Original BASIC version by Jason S. Page                                    */
/* Ported to OS/2 REXX
/* ========================================================================== */

/* Load RexxUtil Library for System Functions */
call RxFuncAdd 'SysLoadFuncs', 'RexxUtil', 'SysLoadFuncs'
call SysLoadFuncs

/* Initialize Variables */
numeric digits 12 /* Ensure precision for calculations */
benford.0 = 9
benford.1 = 30.1
benford.2 = 17.6
benford.3 = 12.5
benford.4 = 9.7
benford.5 = 7.9
benford.6 = 6.7
benford.7 = 5.8
benford.8 = 5.1
benford.9 = 4.6

toke = 0
animate = 0
animate_interval = 100
total_processed = 0

/* Default Block Character for OS/2 (CP437/850 Full Block) */
block_char = 'DB'x 

/* Parse Command Line Arguments */
parse arg params

load1 = ""
a12 = ""
prog = ""
col = ""
anim = ""

/* Helper to parse flags manually since REXX args are one string */
call ParseFlag "-f", params; load1 = result
call ParseFlag "-d", params; a12 = result
call ParseFlag "-l", params; prog = result
call ParseFlag "-c", params; col = result
call ParseFlag "-a", params; anim = result

/* Check for Help */
if pos("-h", params) > 0 | pos("--help", params) > 0 then do
    say "=================================="
    say "bxc - Benford Analysis Tool (OS/2)"
    say "=================================="
    say "Usage: bxc -f [file] -d [1|all] -l [length] -c [column] -a [interval]"
    say ""
    say "Required flags:"
    say "  -f [file]      Data file to analyze (or URL)"
    say "  -d [1|all]     Analyze first digit (1) or all digits (all)"
    say "  -l [number]    Sample pool length (default: 10000)"
    say "  -c [number]    Column number (0 for single column)"
    say ""
    say "Optional flags:"
    say "  -a [interval]  Enable animated graph (default: 100)"
    say "  -h, --help     Show this help message"
    exit
end

/* Animation Flag Logic */
if length(anim) > 0 then do
    animate = 1
    if datatype(anim, 'W') then animate_interval = anim
end

/* Interactive Mode if args are missing */
if length(load1) = 0 | length(a12) = 0 | length(prog) = 0 | length(col) = 0 then do
    say "Error: Missing required flags."
    say ""
    say "Interactive mode:"
    /* List .dat files using RexxUtil */
    call SysFileTree "*.dat", "fileList", "FO"
    do i = 1 to fileList.0
        say "Found: " || fileList.i
    end

    call charout , "Load file: "
    parse pull load1

    call charout , "[A]ll Digits or [1]st Digit: "
    parse pull a12

    call charout , "Capture Average Every (default: 10000) Points: "
    parse pull prog

    call charout , "Which Column Number (0 = single column): "
    parse pull col

    call charout , "Enable animation? [y/n]: "
    parse pull anim_choice

    if translate(left(anim_choice, 1)) = "Y" then do
        animate = 1
        call charout , "Update interval (default: 100): "
        parse pull anim_input
        if datatype(anim_input, 'W') then animate_interval = anim_input
    end
end

/* Set Defaults */
if length(prog) = 0 then prog = "10000"
if length(col) = 0 then col = "0"
/* If col is 0, we treat it as 1 (the whole line is column 1) */
if col = "0" then col = "1"

/* Display Config */
say ""
say "Configuration:"
say "  File: " || load1
say "  Digits: " || a12
say "  Sample size: " || prog
say "  Column: " || col
if animate = 1 then
    say "  Animation: Enabled (update every " || animate_interval || " records)"
else
    say "  Animation: Disabled"
say ""

/* Handle Web Downloads (Requires wget in path on OS/2) */
if left(load1, 3) = "ftp" | left(load1, 3) = "htt" then do
    say "Downloading file..."
    /* Using address CMD to call wget */
    address CMD "wget -c " || load1 || " 2>&1"

    /* Find the most recent file if we don't know the name? 
       The BASIC script does a complex ls check. 
       Here we assume the filename is the last part of the URL for simplicity 
       or we trust the user provided the output name logic implicitly. 
       For strict porting, we will try to find the newest file. */

    call SysFileTree "*", "files", "FT", "* * * * *"
    /* This logic is complex in REXX without specific assumptions. 
       We will assume the filename matches the URL end for this port */
    last_slash = lastpos('/', load1)
    if last_slash > 0 then load1 = substr(load1, last_slash + 1)

    say "Assuming downloaded file is: " || load1
end

/* Verify File Exists */
if \SysFileExists(load1) then do
    say "Error: File '" || load1 || "' not found!"
    exit
end

/* Setup Output Files */
log_values = load1 || "_" || a12 || "-" || prog || "-.log"
log_percent = load1 || "_" || a12 || "-" || prog || "_.log"

/* Clear/Create logs */
call lineout log_values, , 1
call lineout log_percent, , 1

/* Initialize Counters */
/* In REXX we can use a stem for counts to avoid c1, c2, c3... variables */
c. = 0 
count_total = 0 /* Segment counter */

say ""
say "========================================================================"
say "Benford X-C Forensics Digital Analysis Tool (OS/2 REXX Port)"
say "========================================================================"
say ""

if animate = 0 then do
    say "    Record,  1,  2,  3,  4,  5,  6,  7,  8,  9"
end

/* Open Input File */
infile = load1

/* Read loop */
do while lines(infile) > 0
    line = linein(infile)
    toke = toke + 1
    total_processed = total_processed + 1

    /* Skip empty lines */
    if length(line) = 0 then iterate

    /* Extract Column if needed (REXX handles CSV parsing natively) */
    /* Note: BASIC script used 'cut'. We emulate this. */
    ot = line
    if col > 1 then do
        /* Parse CSV - simplistic approach assuming comma delimiter */
        /* Translate commas to spaces, then pick the word */
        /* Note: This breaks if data contains spaces, but mimics basic 'cut' behavior roughly */
        /* Better CSV handling: */
        parse value line with pre "," post
        loop_col = 1
        temp_line = line

        found_val = ""
        do while loop_col <= col
            parse var temp_line this_val "," temp_line
            if loop_col = col then found_val = this_val
            loop_col = loop_col + 1
        end
        ot = found_val
    end

    /* Clean non-numeric characters */
    clean_ot = ""
    /* Using REXX translate to strip non-digits */
    /* Only keep 0-9. Method: Translate everything else to space, then remove spaces */
    /* Or iterate chars */
    do i = 1 to length(ot)
        char = substr(ot, i, 1)
        if datatype(char, 'N') then clean_ot = clean_ot || char
    end
    ot = clean_ot

    if length(ot) = 0 then iterate

    /* Determine digits to analyze */
    if translate(left(a12, 1)) = "A" then position = length(ot)
    else position = 1

    /* Count digits */
    do i = 1 to position
        digit = substr(ot, i, 1)
        /* Ensure it's a valid digit 1-9 (Benford doesn't track 0 usually for leading) */
        if digit >= 1 & digit <= 9 then do
            count_total = count_total + 1
            c.digit = c.digit + 1
        end
    end

    /* Animation */
    if animate = 1 & (total_processed // animate_interval = 0) & count_total > 0 then do
        call DrawAnimatedChart count_total, total_processed, prog
    end

    /* Segment Reporting */
    if count_total >= prog then
        /* Calculate Percentages */
        p. = 0
        do i = 1 to 9
            if count_total > 0 then 
                p.i = format((c.i * 100) / count_total, 2, 2)
            else p.i = 0
        end

        /* Format Output Strings */
        cc1 = ot || "," || toke || "," || c.1 || "," || c.2 || "," || c.3 || "," || c.4 || "," || c.5 || "," || c.6 || "," || c.7 || "," || c.8 || "," || c.9
        cc2 = ot || "," || toke || "," || p.1 || "," || p.2 || "," || p.3 || "," || p.4 || "," || p.5 || "," || p.6 || "," || p.7 || "," || p.8 || "," || p.9

        /* Display if not animating */
        if animate = 0 then do
            say "#: " || cc1
            say "%: " || cc2
        end

        /* Write to logs */
        call lineout log_values, cc1
        call lineout log_percent, cc2

        /* Reset Segment Counters */
        count_total = 0
        do i = 1 to 9; c.i = 0; end
    end
end

call stream infile, 'C', 'CLOSE'
call lineout log_values /* Close file */
call lineout log_percent /* Close file */

if animate = 1 then say ""

say ""
say "========================================================================"
say "Analysis Complete"
say "========================================================================"
say "Total records processed: " || toke
say ""

say "Calculating final statistics and chi-squared test..."
call GenerateFinalReport log_percent, load1, a12, prog

say ""
say "Output files:"
say "  Values log: " || log_values
say "  Percent log: " || log_percent
say ""

exit

/* ========================================================================== */
/* Subroutines                                                                */
/* ========================================================================== */

ParseFlag: procedure
    parse arg flag, cmdline
    result = ""
    ppos = pos(flag, cmdline)
    if ppos > 0 then do
        start = ppos + length(flag) + 1
        /* Find next space or end */
        /* Simple parser: read until next -flag or end of string */
        rest = substr(cmdline, start)
        parse var rest val " -" rest_ignored
        result = strip(val)
    end
    return result

DrawAnimatedChart: procedure expose c. benford. block_char
    parse arg total_seg, total_proc, sample_size

    /* Clear Screen - OS/2 Specific */
    call SysCls

    say "========================================================================"
    say "Benford X-C Live Analysis - Animated View"
    say "========================================================================"
    say "Processing: " || total_proc || " records | Sample pool: " || sample_size || " | Current: " || total_seg
    say "------------------------------------------------------------------------"
    say ""
    say "Digit  Actual   Expected  Deviation  Chart"
    say "-----  -------  --------  ---------  ---------------------------------"

    do i = 1 to 9
        if total_seg > 0 then pct = (c.i * 100) / total_seg
        else pct = 0
        call PrintDigitRow i, pct, benford.i
    end

    say "========================================================================"
    return

PrintDigitRow: procedure expose block_char
    parse arg digit, actual, expected

    deviation = actual - expected
    bar_len = actual / 2
    if bar_len > 50 then bar_len = 50

    bar = copies(block_char, trunc(bar_len))

    /* Format numbers nicely */
    s_actual = format(actual, 2, 1) || "%"
    s_expect = format(expected, 2, 1) || "%"

    sign = " "
    if deviation >= 0 then sign = "+"
    s_dev = sign || format(deviation, 2, 1) || "%"

    say "  " || digit || "    " || left(s_actual, 7) || "  " || left(s_expect, 8) || "  " || left(s_dev, 9) || "  " || bar
    return

GenerateFinalReport: procedure expose benford. block_char
    parse arg percent_file, data_file, mode, samp

    sum. = 0
    count = 0

    /* Read log file */
    /* Check if file has data */
    if stream(percent_file, 'C', 'QUERY SIZE') = 0 then return

    /* Logic: Read the file we just wrote */
    /* Reset stream first */
    call stream percent_file, 'C', 'CLOSE'

    do while lines(percent_file) > 0
        line = linein(percent_file)
        if length(line) = 0 then iterate

        /* Parse the CSV log line */
        /* Format: val, index, p1, p2... p9 */
        parse var line toss "," recd "," p.1 "," p.2 "," p.3 "," p.4 "," p.5 "," p.6 "," p.7 "," p.8 "," p.9

        do i = 1 to 9
            sum.i = sum.i + p.i
        end
        count = count + 1
    end
    call stream percent_file, 'C', 'CLOSE'

    if count > 0 then do
        chi_squared = 0

        say "Final Statistics (Averaged across all samples):"
        say "------------------------------------------------"
        say "Digit  Actual   Expected  Deviation"
        say "-----  -------  --------  ---------"

        do i = 1 to 9
            avg = sum.i / count
            dev = avg - benford.i
            chi_part = (dev * dev) / benford.i
            chi_squared = chi_squared + chi_part

            s_avg = format(avg, 2, 1) || "%"
            s_exp = format(benford.i, 2, 1) || "%"
            s_dev = format(dev, 2, 1) || "%"

            say "  " || i || "    " || left(s_avg, 7) || "  " || left(s_exp, 8) || "  " || s_dev
        end

        say ""
        say "Chi-Squared Statistic: " || format(chi_squared, 4, 4)
        say ""

        if chi_squared < 15.51 then do
            say "Result: Data FITS Benford's Law (95% confidence)"
            say "        No significant deviation detected."
        end
        else do
            say "Result: Data DOES NOT fit Benford's Law (95% confidence)"
            say "        Significant deviation detected - possible fraud indicator!"
        end

        /* Generate ASCII Chart File */
        say ""
        say "Generating ASCII chart..."
        chart_file = "chart_" || percent_file || ".txt"

        call lineout chart_file, , 1 /* Create/Clear */

        /* Re-read percent file for charting */
        call stream percent_file, 'C', 'CLOSE'
        sample_count = 0

        do while lines(percent_file) > 0
            line = linein(percent_file)
            if length(line) = 0 then iterate

            parse var line toss "," recd "," p.1 "," p.2 "," p.3 "," p.4 "," p.5 "," p.6 "," p.7 "," p.8 "," p.9

            sample_count = sample_count + 1
            call lineout chart_file, "========================================================================"
            call lineout chart_file, "Sample #" || sample_count || " | Record: " || recd || " | Value: " || toss
            call lineout chart_file, "------------------------------------------------------------------------"

            do i = 1 to 9
                call WriteChartLine chart_file, i, p.i
            end
            call lineout chart_file, ""
        end
        call stream chart_file, 'C', 'CLOSE'

        say "ASCII chart saved to: " || chart_file
        say ""
        say "Preview (first sample):"
        /* Using REXX to head/tail instead of shell */
        call DisplayHead chart_file, 15
        say ""
        say "Preview (last sample):"
        call DisplayTail chart_file, 12
    end
    return

WriteChartLine: procedure expose block_char
    parse arg file_name, digit, percentage

    bar_len = percentage
    if bar_len > 100 then bar_len = 100

    bar = ""
    /* In the file chart, use the digit number for the bar like the BASIC script did, or block char? */
    /* BASIC used: bar$ = bar$ + str$(digit) */
    bar = copies(digit, trunc(bar_len))

    line = "  " || digit || " | " || format(percentage, 2, 1) || "% | " || bar
    call lineout file_name, line
    return

DisplayHead: procedure
    parse arg file, lines
    cnt = 0
    call stream file, 'C', 'CLOSE'
    do while lines(file) > 0 & cnt < lines
        say linein(file)
        cnt = cnt + 1
    end
    call stream file, 'C', 'CLOSE'
    return

DisplayTail: procedure
    /* Efficient tail in REXX is hard without reading whole file or seek */
    /* We will just read all and keep a buffer */
    parse arg file, num_lines

    /* Using RexxUtil to read file into stem is easiest for Tail logic */
    call SysFileRead file, "fileContent."

    start_line = fileContent.0 - num_lines + 1
    if start_line < 1 then start_line = 1

    do i = start_line to fileContent.0
        say fileContent.i
    end
    return

Original Author: admin

Views: 66 (Unique: 56)

Page ID ( Copy Link): page_6923db566fd807.22535029-4870ac84de8bbc87

Page History (3 revisions):