Skip to content

Commit 13e3fa3

Browse files
committed
Encoding
1 parent 201ff08 commit 13e3fa3

9 files changed

Lines changed: 197 additions & 75 deletions

File tree

.DS_Store

-6 KB
Binary file not shown.

secretary/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
SECRETARY
22
node_modules
3+
4+
freq

secretary/utils/encode/sec32.go

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
package encode
2+
3+
import "github.com/codeharik/secretary/utils"
4+
5+
var SEC32KeyMap = [][]byte{
6+
/*00*/ {'~'},
7+
/*01*/ {'a', 'A'},
8+
/*02*/ {'p', 'P', 'b', 'B', '+'},
9+
/*03*/ {'c', 'C', 's', 'S', ':'},
10+
/*04*/ {'d', 'D', '/'},
11+
/*05*/ {'e', 'E', '='},
12+
/*06*/ {'i', 'I'},
13+
/*07*/ {'f', 'F', '"', '\''},
14+
/*08*/ {'g', 'G', 'j', 'J', 'x', 'X', 'z', 'Z'},
15+
/*09*/ {'h', 'H', '%'},
16+
/*10*/ {'o', 'O'},
17+
/*11*/ {'k', 'K', 'q', 'Q', '?', '!', '\\', '|'},
18+
/*12*/ {'l', 'L', '&', '@'},
19+
/*13*/ {'m', 'M', '-'},
20+
/*14*/ {'n', 'N', '*'},
21+
/*15*/ {'t', 'T'},
22+
/*16*/ {'r', 'R', '^'},
23+
/*17*/ {'u', 'U', 'v', 'V', '#'},
24+
/*18*/ {'w', 'W', 'y', 'Y', '$'},
25+
/*19*/ {'0'},
26+
/*20*/ {'1'},
27+
/*21*/ {'2'},
28+
/*22*/ {'3'},
29+
/*23*/ {'4'},
30+
/*24*/ {'5'},
31+
/*25*/ {'6'},
32+
/*26*/ {'7'},
33+
/*27*/ {'8'},
34+
/*28*/ {'9'},
35+
/*29*/ {'(', '[', '<', '{'},
36+
/*30*/ {')', ']', '>', '}'},
37+
/*31*/ {'_', '.', ',', ';', ' ', '\n'},
38+
}
39+
40+
const SEC32 = `-apcdeifghoklmntruw0123456789QR_`
41+
42+
var (
43+
ASCII32Index = [256]byte{}
44+
SEC32Index = [256]byte{}
45+
)
46+
47+
func init() {
48+
for i := range ASCII32Index {
49+
ASCII32Index[i] = 0
50+
SEC32Index[i] = 0
51+
}
52+
for index, chars := range SEC32KeyMap {
53+
SEC32Index[SEC32[index]] = byte(index)
54+
for _, c := range chars {
55+
ASCII32Index[c] = byte(index)
56+
}
57+
}
58+
}
59+
60+
func AsciiToSec32(str string) string {
61+
return string(utils.Map(
62+
[]byte(str),
63+
func(c byte) byte {
64+
return SEC32[ASCII32Index[c]]
65+
}))
66+
}
67+
68+
func Ascii32ToIndex(str string) []byte {
69+
return utils.Map(
70+
[]byte(str),
71+
func(c byte) byte {
72+
return ASCII32Index[c]
73+
})
74+
}
75+
76+
func IndexToAscii32(arr []byte) string {
77+
return string(utils.Map(
78+
arr,
79+
func(i byte) byte {
80+
return SEC32KeyMap[i][0]
81+
}))
82+
}
83+
84+
func Sec32ToAscii(str string) string {
85+
return string(utils.Map(
86+
[]byte(str),
87+
func(c byte) byte {
88+
return SEC32KeyMap[SEC32Index[c]][0]
89+
}))
90+
}
91+
92+
func Sec32ToIndex(str string) []byte {
93+
return utils.Map(
94+
[]byte(str),
95+
func(c byte) byte {
96+
return SEC32Index[c]
97+
})
98+
}
99+
100+
func IndexToSec32(arr []byte) string {
101+
return string(utils.Map(
102+
arr,
103+
func(i byte) byte {
104+
return SEC32[i]
105+
}))
106+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
package encode
Lines changed: 49 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
package sec64
1+
package encode
22

33
import (
4-
"fmt"
54
"strings"
5+
6+
"github.com/codeharik/secretary/utils"
67
)
78

89
var (
@@ -20,104 +21,83 @@ var (
2021
**/
2122

2223
// ABCDEFGHIJKLMNOPQRSTUVWXYZ | [{}]
23-
ASCII = []byte(`~abcdefghijklmnopqrstuvwxyz0123456789=+-*/\%^<>!?@#$&(),;:'"_. N`)
24-
SEC64 = []byte(`-abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWX._+`)
24+
ASCII64 = []byte(`~abcdefghijklmnopqrstuvwxyz0123456789=+-*/\%^<>!?@#$&(),;:'"_. N`)
25+
SEC64 = []byte(`-abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWX._+`)
2526
// 0123456789012345678901234567890123456789012345678901234567890123
2627
)
2728

28-
type Sec64 struct {
29-
index byte
30-
char byte
31-
}
32-
3329
var (
34-
Ascii2Sec [256]Sec64
35-
Sec2Ascii [256]Sec64
30+
ASCII64Index = [256]byte{}
31+
SEC64Index = [256]byte{}
3632
)
3733

3834
func init() {
39-
ASCII[63] = '\n'
35+
ASCII64[63] = '\n'
4036

41-
for i := range Ascii2Sec {
42-
Sec2Ascii[i] = Sec64{index: 0, char: '~'}
43-
Ascii2Sec[i] = Sec64{index: 0, char: '-'}
37+
for i := range ASCII64Index {
38+
ASCII64Index[i] = 0
39+
SEC64Index[i] = 0
4440
}
45-
for i := 1; i < 63; i++ {
46-
Ascii2Sec[ASCII[i]] = Sec64{index: byte(i), char: SEC64[i]}
47-
Sec2Ascii[SEC64[i]] = Sec64{index: byte(i), char: ASCII[i]}
41+
42+
for i := 1; i < 64; i++ {
43+
ASCII64Index[ASCII64[i]] = byte(i)
44+
SEC64Index[SEC64[i]] = byte(i)
4845
}
4946
for c := 'A'; c <= 'Z'; c++ {
50-
Ascii2Sec[c] = Sec64{index: byte(c - 'A' + 1), char: SEC64[c-'A'+1]}
47+
ASCII64Index[c] = byte(c - 'A' + 1)
5148
}
5249
brackets := map[rune]rune{'[': '(', ']': ')', '{': '(', '}': ')', '|': '\\'}
5350
for k, v := range brackets {
54-
Ascii2Sec[k] = Ascii2Sec[v]
55-
}
56-
57-
Sec2Ascii['+'] = Sec64{index: 63, char: '\n'}
58-
Ascii2Sec['\n'] = Sec64{index: 63, char: '+'}
59-
60-
for i := 0; i < 128; i++ {
61-
c := rune(i)
62-
fmt.Printf(
63-
"%3d %-7q A2S: %-4d %-4q S2A: %-4d %-4q\n",
64-
i, c,
65-
Ascii2Sec[c].index, Ascii2Sec[c].char, Sec2Ascii[c].index, Sec2Ascii[c].char,
66-
)
51+
ASCII64Index[k] = ASCII64Index[v]
6752
}
68-
fmt.Println()
69-
fmt.Println(string(SEC64))
70-
fmt.Println(string(ASCII))
7153
}
7254

7355
func AsciiToSec64(str string) string {
74-
enc := make([]byte, len(str))
75-
for i := 0; i < len(str); i++ {
76-
enc[i] = Ascii2Sec[str[i]].char
77-
}
78-
return string(enc)
56+
return string(utils.Map(
57+
[]byte(str),
58+
func(c byte) byte {
59+
return SEC64[ASCII64Index[c]]
60+
}))
7961
}
8062

81-
func AsciiToIndex(str string) []byte {
82-
enc := make([]byte, len(str))
83-
for i := 0; i < len(str); i++ {
84-
enc[i] = Ascii2Sec[str[i]].index
85-
}
86-
return enc
63+
func Ascii64ToIndex(str string) []byte {
64+
return utils.Map(
65+
[]byte(str),
66+
func(c byte) byte {
67+
return ASCII64Index[c]
68+
})
8769
}
8870

89-
func IndexToAscii(indexes []byte) string {
90-
str := make([]byte, len(indexes))
91-
for i := 0; i < len(indexes); i++ {
92-
str[i] = ASCII[indexes[i]]
93-
// fmt.Printf("%-3d %-3d %-3q %-3d\n", i, indexes[i], string(str[i]), str[i])
94-
}
95-
fmt.Println()
96-
return string(str)
71+
func IndexToAscii64(indexes []byte) string {
72+
return string(utils.Map(
73+
indexes,
74+
func(i byte) byte {
75+
return ASCII64[i]
76+
}))
9777
}
9878

9979
func Sec64ToAscii(str string) string {
100-
dec := make([]byte, len(str))
101-
for i := 0; i < len(str); i++ {
102-
dec[i] = Sec2Ascii[str[i]].char
103-
}
104-
return string(dec)
80+
return string(utils.Map(
81+
[]byte(str),
82+
func(c byte) byte {
83+
return ASCII64[SEC64Index[c]]
84+
}))
10585
}
10686

10787
func Sec64ToIndex(str string) []byte {
108-
dec := make([]byte, len(str))
109-
for i := 0; i < len(str); i++ {
110-
dec[i] = Sec2Ascii[str[i]].index
111-
}
112-
return dec
88+
return utils.Map(
89+
[]byte(str),
90+
func(c byte) byte {
91+
return SEC64Index[c]
92+
})
11393
}
11494

11595
func IndexToSec64(indexes []byte) string {
116-
str := make([]byte, len(indexes))
117-
for i := 0; i < len(indexes); i++ {
118-
str[i] = SEC64[indexes[i]]
119-
}
120-
return string(str)
96+
return string(utils.Map(
97+
indexes,
98+
func(i byte) byte {
99+
return SEC64[i]
100+
}))
121101
}
122102

123103
func AsciiToSec64Expand(str string) string {
Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package sec64
1+
package encode
22

33
import (
44
"bytes"
@@ -45,15 +45,15 @@ func TestEncodingDecoding(t *testing.T) {
4545

4646
for _, tt := range tests {
4747
asciiToSec64 := AsciiToSec64(tt.input)
48-
asciiToIndex := AsciiToIndex(tt.input)
48+
asciiToIndex := Ascii64ToIndex(tt.input)
4949
sec64ToAscii := Sec64ToAscii(asciiToSec64)
5050
sec64ToIndex := Sec64ToIndex(asciiToSec64)
5151
// packedIndexes := Pack8to6(asciiToIndex)
5252
// unpackedIndexes := Unpack6to8(packedIndexes)
5353
// indexToAscii := IndexToAscii(unpackedIndexes)
5454
// indexToSec64 := IndexToSec64(unpackedIndexes)
55-
encoded := Pack8to6(AsciiToIndex(tt.input))
56-
decodedAscii := IndexToAscii(Unpack6to8(encoded))
55+
encoded := Pack8to6(Ascii64ToIndex(tt.input))
56+
decodedAscii := IndexToAscii64(Unpack6to8(encoded))
5757
decodedSec64 := IndexToSec64(Unpack6to8(encoded))
5858

5959
// utils.Log(
@@ -96,7 +96,7 @@ func binStr(data []byte, bytemode, spacemode, debugmode bool) string {
9696

9797
for _, b := range data {
9898
if debugmode {
99-
sb.WriteString(fmt.Sprintf(format+"(%q) ", b, Sec2Ascii[b&0b00111111]))
99+
sb.WriteString(fmt.Sprintf(format+"(%q) ", b, ASCII64[SEC64Index[b&0b00111111]]))
100100
} else {
101101
sb.WriteString(fmt.Sprintf(format, b))
102102
}

secretary/utils/file/filechunk_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ From that day on, Luma became a storyteller, sharing her experiences and encoura
4646

4747
file.Write([]byte(fmt.Sprintf("\n--->Sec64Expand %d\n", n)))
4848

49-
n, err = file.Write([]byte(sec64.AsciiToSec64(text)))
49+
n, err = file.Write([]byte(sec64.StringToSec64(text)))
5050
if err != nil {
5151
return "", err
5252
}

secretary/utils/ngram/ngram.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
package ngram
2+
3+
import "strings"
4+
5+
// GenerateNGrams creates n-grams of the given size from a string.
6+
func GenerateNGrams(text string, n int) []string {
7+
var ngrams []string
8+
words := strings.Fields(text) // Split into words
9+
10+
if len(words) < n {
11+
return ngrams // Not enough words to form an n-gram
12+
}
13+
14+
for i := 0; i <= len(words)-n; i++ {
15+
ngrams = append(ngrams, strings.Join(words[i:i+n], " "))
16+
}
17+
18+
return ngrams
19+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
package ngram
2+
3+
import (
4+
"fmt"
5+
"testing"
6+
)
7+
8+
func TestNGram(t *testing.T) {
9+
text := "hello world this is a test"
10+
11+
fmt.Println("Bigrams:", GenerateNGrams(text, 2))
12+
fmt.Println("Trigrams:", GenerateNGrams(text, 3))
13+
fmt.Println("Four-grams:", GenerateNGrams(text, 4))
14+
}

0 commit comments

Comments
 (0)