#!/usr/local/bin/perl

## Enter a short description of the script

use strict;
use warnings;

binmode(STDOUT,":utf8");
my $fileName = shift;

open(IFILE, "<:encoding(utf8)", $fileName) or die("Couldn't open the file $fileName\n");

while(<IFILE>){
	chomp;
	# Convert the punctuation
# 	s/\x{060C}/ , /g;
# 	s/\x{061F}/ ? /g;
# 	s/\x{061B}/ ; /g;
# 	s/\x{06D4}/ . /g;
# 	s/\x{066A}/ % /g;
# 	s/\x{066B}/./g;
# 	s/\x{066C}/,/g;
	
	s/\x{2026}//g; # Horizontal elipsis
	s/\.+/./g;

	s/[\x{2018}\x{2019}\x{201A}\x{201B}]/'/g;
	s/[\x{201C}\x{201D}\x{201E}\x{201F}]/"/g;

	s/([\)\(\?\|\"\!])/ $1 /g;
	
	s/(\d)(\-)/$1 $2 /g;

	s/(\w)('s\s)/$1 $2/g;
	s/(\w)(')\s/$1 $2 /g;
	s/(^|\s)(')(\w)/$1 $2 $3/g;
	
	s/(\d+) , (000)/$1$2/g; # Undo earlier mistake in normalization
	s/([^0-9])([,\.])/$1 $2 /g;
	s/([,\.])([^0-9])/ $1 $2/g;
	
	s/\s+/ /g;
	s/\s+$//g;
	s/^\s+//g;
	s/[\x{FEFF}\x{202B}\x{202C}\x{202A}]//g;
	print $_,"\n";
# 	print lc($_),"\n";
}
close IFILE;