@@ -2,6 +2,7 @@ package main
22
33import (
44 "bufio"
5+ _ "embed"
56 "fmt"
67 "io"
78 "log"
@@ -12,14 +13,21 @@ import (
1213 "golang.org/x/text/unicode/runenames"
1314)
1415
16+ //go:embed README.md
17+ var helpText string
18+
1519func main () {
1620
17- var control = pflag .Bool ("control" , false , "Include control characters" )
1821 var ascii = pflag .Bool ("ascii" , false , "Include ASCII characters" )
1922 var codepoint = pflag .Bool ("codepoint" , true , "Print the U+XXXX codepoint" )
23+ var line = pflag .Bool ("line" , true , "Print the line number" )
2024 var offset = pflag .Bool ("offset" , true , "Print the offset" )
2125 var char = pflag .Bool ("char" , false , "Print the character itself" )
22- var version = pflag .Bool ("version" , false , "Print version information" )
26+
27+ var first = pflag .Bool ("first" , false , "Only print the first occurrence of each character" )
28+
29+ var help = pflag .Bool ("help" , false , "Detailed help" )
30+ var version = pflag .Bool ("version" , false , "Version info" )
2331
2432 pflag .Parse ()
2533
@@ -28,11 +36,20 @@ func main() {
2836 return
2937 }
3038
39+ if * help {
40+ fmt .Printf ("%s\n " , helpText )
41+ return
42+ }
43+
3144 args := pflag .Args ()
3245 if len (args ) == 0 {
33- args = []string {"-" }
46+ fmt .Printf ("Usage: uniwhat [options] file ...\n \n " )
47+ pflag .PrintDefaults ()
48+ return
3449 }
3550
51+ firstMap := make (map [rune ]bool )
52+
3653 for _ , arg := range args {
3754 if arg == "-" {
3855 arg = "/dev/stdin"
@@ -46,9 +63,10 @@ func main() {
4663 }
4764 defer file .Close ()
4865
49- reader := bufio .NewReader (file )
66+ reader := bufio .NewReaderSize (file , 1024 * 1024 )
5067
5168 var pos int = 0
69+ var lineNum int = 1
5270
5371 // Loop to read runes one by one
5472 for {
@@ -59,21 +77,32 @@ func main() {
5977 }
6078 log .Fatalf ("Error reading rune: %v" , err )
6179 }
62- if r < 0x1F && ! * control {
63- pos += rsize
64- continue // Skip control characters if --control is not set
80+ pos += rsize
81+
82+ if r == '\n' {
83+ lineNum ++
6584 }
66- if r <= 0x7E && ! * ascii {
67- pos += rsize
85+
86+ if ! * ascii && (( r >= 0x20 && r <= 0x7E ) || r == 0x09 || r == 0x0A || r == 0x0D ) {
6887 continue // Skip ASCII characters if --ascii is not set
6988 }
89+
90+ if * first {
91+ if _ , exists := firstMap [r ]; exists {
92+ continue
93+ }
94+ firstMap [r ] = true
95+ }
96+
7097 name := runenames .Name (r )
7198 if name == "" {
7299 name = "<unknown>"
73100 }
74101 if * offset {
75- // Note: Getting the exact byte offset of the rune is complex; this is a placeholder
76- fmt .Printf ("%08x " , pos )
102+ fmt .Printf ("%08x " , pos - rsize )
103+ }
104+ if * line {
105+ fmt .Printf ("%6d " , lineNum )
77106 }
78107 if * codepoint {
79108 fmt .Printf ("U+%04X " , r )
@@ -82,8 +111,6 @@ func main() {
82111 fmt .Printf ("%c " , r )
83112 }
84113 fmt .Printf ("%s\n " , name )
85-
86- pos += rsize
87114 }
88115 }
89116}
0 commit comments