/* REXX */
/* ========================================================================== */
/* BXC - Benford Analysis Tool (REXX Port for OS/2 Warp / ArcaOS) */
/* Original BASIC version by Jason S. Page */
/* Ported to OS/2 REXX
/* ========================================================================== */
/* Load RexxUtil Library for System Functions */
call RxFuncAdd 'SysLoadFuncs', 'RexxUtil', 'SysLoadFuncs'
call SysLoadFuncs
/* Initialize Variables */
numeric digits 12 /* Ensure precision for calculations */
benford.0 = 9
benford.1 = 30.1
benford.2 = 17.6
benford.3 = 12.5
benford.4 = 9.7
benford.5 = 7.9
benford.6 = 6.7
benford.7 = 5.8
benford.8 = 5.1
benford.9 = 4.6
toke = 0
animate = 0
animate_interval = 100
total_processed = 0
/* Default Block Character for OS/2 (CP437/850 Full Block) */
block_char = 'DB'x
/* Parse Command Line Arguments */
parse arg params
load1 = ""
a12 = ""
prog = ""
col = ""
anim = ""
/* Helper to parse flags manually since REXX args are one string */
call ParseFlag "-f", params; load1 = result
call ParseFlag "-d", params; a12 = result
call ParseFlag "-l", params; prog = result
call ParseFlag "-c", params; col = result
call ParseFlag "-a", params; anim = result
/* Check for Help */
if pos("-h", params) > 0 | pos("--help", params) > 0 then do
say "=================================="
say "bxc - Benford Analysis Tool (OS/2)"
say "=================================="
say "Usage: bxc -f [file] -d [1|all] -l [length] -c [column] -a [interval]"
say ""
say "Required flags:"
say " -f [file] Data file to analyze (or URL)"
say " -d [1|all] Analyze first digit (1) or all digits (all)"
say " -l [number] Sample pool length (default: 10000)"
say " -c [number] Column number (0 for single column)"
say ""
say "Optional flags:"
say " -a [interval] Enable animated graph (default: 100)"
say " -h, --help Show this help message"
exit
end
/* Animation Flag Logic */
if length(anim) > 0 then do
animate = 1
if datatype(anim, 'W') then animate_interval = anim
end
/* Interactive Mode if args are missing */
if length(load1) = 0 | length(a12) = 0 | length(prog) = 0 | length(col) = 0 then do
say "Error: Missing required flags."
say ""
say "Interactive mode:"
/* List .dat files using RexxUtil */
call SysFileTree "*.dat", "fileList", "FO"
do i = 1 to fileList.0
say "Found: " || fileList.i
end
call charout , "Load file: "
parse pull load1
call charout , "[A]ll Digits or [1]st Digit: "
parse pull a12
call charout , "Capture Average Every (default: 10000) Points: "
parse pull prog
call charout , "Which Column Number (0 = single column): "
parse pull col
call charout , "Enable animation? [y/n]: "
parse pull anim_choice
if translate(left(anim_choice, 1)) = "Y" then do
animate = 1
call charout , "Update interval (default: 100): "
parse pull anim_input
if datatype(anim_input, 'W') then animate_interval = anim_input
end
end
/* Set Defaults */
if length(prog) = 0 then prog = "10000"
if length(col) = 0 then col = "0"
/* If col is 0, we treat it as 1 (the whole line is column 1) */
if col = "0" then col = "1"
/* Display Config */
say ""
say "Configuration:"
say " File: " || load1
say " Digits: " || a12
say " Sample size: " || prog
say " Column: " || col
if animate = 1 then
say " Animation: Enabled (update every " || animate_interval || " records)"
else
say " Animation: Disabled"
say ""
/* Handle Web Downloads (Requires wget in path on OS/2) */
if left(load1, 3) = "ftp" | left(load1, 3) = "htt" then do
say "Downloading file..."
/* Using address CMD to call wget */
address CMD "wget -c " || load1 || " 2>&1"
/* Find the most recent file if we don't know the name?
The BASIC script does a complex ls check.
Here we assume the filename is the last part of the URL for simplicity
or we trust the user provided the output name logic implicitly.
For strict porting, we will try to find the newest file. */
call SysFileTree "*", "files", "FT", "* * * * *"
/* This logic is complex in REXX without specific assumptions.
We will assume the filename matches the URL end for this port */
last_slash = lastpos('/', load1)
if last_slash > 0 then load1 = substr(load1, last_slash + 1)
say "Assuming downloaded file is: " || load1
end
/* Verify File Exists */
if \SysFileExists(load1) then do
say "Error: File '" || load1 || "' not found!"
exit
end
/* Setup Output Files */
log_values = load1 || "_" || a12 || "-" || prog || "-.log"
log_percent = load1 || "_" || a12 || "-" || prog || "_.log"
/* Clear/Create logs */
call lineout log_values, , 1
call lineout log_percent, , 1
/* Initialize Counters */
/* In REXX we can use a stem for counts to avoid c1, c2, c3... variables */
c. = 0
count_total = 0 /* Segment counter */
say ""
say "========================================================================"
say "Benford X-C Forensics Digital Analysis Tool (OS/2 REXX Port)"
say "========================================================================"
say ""
if animate = 0 then do
say " Record, 1, 2, 3, 4, 5, 6, 7, 8, 9"
end
/* Open Input File */
infile = load1
/* Read loop */
do while lines(infile) > 0
line = linein(infile)
toke = toke + 1
total_processed = total_processed + 1
/* Skip empty lines */
if length(line) = 0 then iterate
/* Extract Column if needed (REXX handles CSV parsing natively) */
/* Note: BASIC script used 'cut'. We emulate this. */
ot = line
if col > 1 then do
/* Parse CSV - simplistic approach assuming comma delimiter */
/* Translate commas to spaces, then pick the word */
/* Note: This breaks if data contains spaces, but mimics basic 'cut' behavior roughly */
/* Better CSV handling: */
parse value line with pre "," post
loop_col = 1
temp_line = line
found_val = ""
do while loop_col <= col
parse var temp_line this_val "," temp_line
if loop_col = col then found_val = this_val
loop_col = loop_col + 1
end
ot = found_val
end
/* Clean non-numeric characters */
clean_ot = ""
/* Using REXX translate to strip non-digits */
/* Only keep 0-9. Method: Translate everything else to space, then remove spaces */
/* Or iterate chars */
do i = 1 to length(ot)
char = substr(ot, i, 1)
if datatype(char, 'N') then clean_ot = clean_ot || char
end
ot = clean_ot
if length(ot) = 0 then iterate
/* Determine digits to analyze */
if translate(left(a12, 1)) = "A" then position = length(ot)
else position = 1
/* Count digits */
do i = 1 to position
digit = substr(ot, i, 1)
/* Ensure it's a valid digit 1-9 (Benford doesn't track 0 usually for leading) */
if digit >= 1 & digit <= 9 then do
count_total = count_total + 1
c.digit = c.digit + 1
end
end
/* Animation */
if animate = 1 & (total_processed // animate_interval = 0) & count_total > 0 then do
call DrawAnimatedChart count_total, total_processed, prog
end
/* Segment Reporting */
if count_total >= prog then
/* Calculate Percentages */
p. = 0
do i = 1 to 9
if count_total > 0 then
p.i = format((c.i * 100) / count_total, 2, 2)
else p.i = 0
end
/* Format Output Strings */
cc1 = ot || "," || toke || "," || c.1 || "," || c.2 || "," || c.3 || "," || c.4 || "," || c.5 || "," || c.6 || "," || c.7 || "," || c.8 || "," || c.9
cc2 = ot || "," || toke || "," || p.1 || "," || p.2 || "," || p.3 || "," || p.4 || "," || p.5 || "," || p.6 || "," || p.7 || "," || p.8 || "," || p.9
/* Display if not animating */
if animate = 0 then do
say "#: " || cc1
say "%: " || cc2
end
/* Write to logs */
call lineout log_values, cc1
call lineout log_percent, cc2
/* Reset Segment Counters */
count_total = 0
do i = 1 to 9; c.i = 0; end
end
end
call stream infile, 'C', 'CLOSE'
call lineout log_values /* Close file */
call lineout log_percent /* Close file */
if animate = 1 then say ""
say ""
say "========================================================================"
say "Analysis Complete"
say "========================================================================"
say "Total records processed: " || toke
say ""
say "Calculating final statistics and chi-squared test..."
call GenerateFinalReport log_percent, load1, a12, prog
say ""
say "Output files:"
say " Values log: " || log_values
say " Percent log: " || log_percent
say ""
exit
/* ========================================================================== */
/* Subroutines */
/* ========================================================================== */
ParseFlag: procedure
parse arg flag, cmdline
result = ""
ppos = pos(flag, cmdline)
if ppos > 0 then do
start = ppos + length(flag) + 1
/* Find next space or end */
/* Simple parser: read until next -flag or end of string */
rest = substr(cmdline, start)
parse var rest val " -" rest_ignored
result = strip(val)
end
return result
DrawAnimatedChart: procedure expose c. benford. block_char
parse arg total_seg, total_proc, sample_size
/* Clear Screen - OS/2 Specific */
call SysCls
say "========================================================================"
say "Benford X-C Live Analysis - Animated View"
say "========================================================================"
say "Processing: " || total_proc || " records | Sample pool: " || sample_size || " | Current: " || total_seg
say "------------------------------------------------------------------------"
say ""
say "Digit Actual Expected Deviation Chart"
say "----- ------- -------- --------- ---------------------------------"
do i = 1 to 9
if total_seg > 0 then pct = (c.i * 100) / total_seg
else pct = 0
call PrintDigitRow i, pct, benford.i
end
say "========================================================================"
return
PrintDigitRow: procedure expose block_char
parse arg digit, actual, expected
deviation = actual - expected
bar_len = actual / 2
if bar_len > 50 then bar_len = 50
bar = copies(block_char, trunc(bar_len))
/* Format numbers nicely */
s_actual = format(actual, 2, 1) || "%"
s_expect = format(expected, 2, 1) || "%"
sign = " "
if deviation >= 0 then sign = "+"
s_dev = sign || format(deviation, 2, 1) || "%"
say " " || digit || " " || left(s_actual, 7) || " " || left(s_expect, 8) || " " || left(s_dev, 9) || " " || bar
return
GenerateFinalReport: procedure expose benford. block_char
parse arg percent_file, data_file, mode, samp
sum. = 0
count = 0
/* Read log file */
/* Check if file has data */
if stream(percent_file, 'C', 'QUERY SIZE') = 0 then return
/* Logic: Read the file we just wrote */
/* Reset stream first */
call stream percent_file, 'C', 'CLOSE'
do while lines(percent_file) > 0
line = linein(percent_file)
if length(line) = 0 then iterate
/* Parse the CSV log line */
/* Format: val, index, p1, p2... p9 */
parse var line toss "," recd "," p.1 "," p.2 "," p.3 "," p.4 "," p.5 "," p.6 "," p.7 "," p.8 "," p.9
do i = 1 to 9
sum.i = sum.i + p.i
end
count = count + 1
end
call stream percent_file, 'C', 'CLOSE'
if count > 0 then do
chi_squared = 0
say "Final Statistics (Averaged across all samples):"
say "------------------------------------------------"
say "Digit Actual Expected Deviation"
say "----- ------- -------- ---------"
do i = 1 to 9
avg = sum.i / count
dev = avg - benford.i
chi_part = (dev * dev) / benford.i
chi_squared = chi_squared + chi_part
s_avg = format(avg, 2, 1) || "%"
s_exp = format(benford.i, 2, 1) || "%"
s_dev = format(dev, 2, 1) || "%"
say " " || i || " " || left(s_avg, 7) || " " || left(s_exp, 8) || " " || s_dev
end
say ""
say "Chi-Squared Statistic: " || format(chi_squared, 4, 4)
say ""
if chi_squared < 15.51 then do
say "Result: Data FITS Benford's Law (95% confidence)"
say " No significant deviation detected."
end
else do
say "Result: Data DOES NOT fit Benford's Law (95% confidence)"
say " Significant deviation detected - possible fraud indicator!"
end
/* Generate ASCII Chart File */
say ""
say "Generating ASCII chart..."
chart_file = "chart_" || percent_file || ".txt"
call lineout chart_file, , 1 /* Create/Clear */
/* Re-read percent file for charting */
call stream percent_file, 'C', 'CLOSE'
sample_count = 0
do while lines(percent_file) > 0
line = linein(percent_file)
if length(line) = 0 then iterate
parse var line toss "," recd "," p.1 "," p.2 "," p.3 "," p.4 "," p.5 "," p.6 "," p.7 "," p.8 "," p.9
sample_count = sample_count + 1
call lineout chart_file, "========================================================================"
call lineout chart_file, "Sample #" || sample_count || " | Record: " || recd || " | Value: " || toss
call lineout chart_file, "------------------------------------------------------------------------"
do i = 1 to 9
call WriteChartLine chart_file, i, p.i
end
call lineout chart_file, ""
end
call stream chart_file, 'C', 'CLOSE'
say "ASCII chart saved to: " || chart_file
say ""
say "Preview (first sample):"
/* Using REXX to head/tail instead of shell */
call DisplayHead chart_file, 15
say ""
say "Preview (last sample):"
call DisplayTail chart_file, 12
end
return
WriteChartLine: procedure expose block_char
parse arg file_name, digit, percentage
bar_len = percentage
if bar_len > 100 then bar_len = 100
bar = ""
/* In the file chart, use the digit number for the bar like the BASIC script did, or block char? */
/* BASIC used: bar$ = bar$ + str$(digit) */
bar = copies(digit, trunc(bar_len))
line = " " || digit || " | " || format(percentage, 2, 1) || "% | " || bar
call lineout file_name, line
return
DisplayHead: procedure
parse arg file, lines
cnt = 0
call stream file, 'C', 'CLOSE'
do while lines(file) > 0 & cnt < lines
say linein(file)
cnt = cnt + 1
end
call stream file, 'C', 'CLOSE'
return
DisplayTail: procedure
/* Efficient tail in REXX is hard without reading whole file or seek */
/* We will just read all and keep a buffer */
parse arg file, num_lines
/* Using RexxUtil to read file into stem is easiest for Tail logic */
call SysFileRead file, "fileContent."
start_line = fileContent.0 - num_lines + 1
if start_line < 1 then start_line = 1
do i = start_line to fileContent.0
say fileContent.i
end
return