-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathg2p.go
137 lines (128 loc) · 2.99 KB
/
g2p.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
package prg2p
import (
"fmt"
"io"
"strings"
)
// G2P transcriber class that takes a populated double trie tree
// with parsed grapheme-to-phoneme rules. It exposes transcription
// interface that takes individual words and outputs their most
// likely transcripts.
type G2P struct {
tree *trieNode
}
// newG2P returns G2P object responsible for handling transcription.
func newG2P(t *trieNode) *G2P {
g := G2P{
tree: t,
}
return &g
}
// Load returns a fully initialized G2P object with rules read from r.
func Load(r io.Reader) (*G2P, error) {
interp := newInterpreter()
err := interp.scan(r)
if err != nil {
return nil, err
}
tree := newTree(interp)
g2p := newG2P(tree)
return g2p, nil
}
// Transcribe word from graphemic to phonemic transcription. Use n to specify
// whether to return all possible transcriptions or just the first hit.
func (g *G2P) Transcribe(w string, all bool) ([]string, error) {
if g.tree == nil {
return []string{}, fmt.Errorf("trie node is nil")
}
var trans [][]string
w = strings.ToLower(w)
nchars := len([]rune(w))
i := 0
for i < nchars {
t := g.rightVars(w, i, i-1, g.tree)
if t == nil {
return []string{}, fmt.Errorf("failed to transcribe %s", w)
}
trans = append(trans, t.output)
i += t.nchars
}
out, err := g.all(trans, 0)
if err != nil {
return []string{}, err
}
if all == true {
return out, nil
}
return out[:1], nil
}
// All grabs all possible transcription variants.
func (g *G2P) all(trans [][]string, i int) ([]string, error) {
if len(trans) == 0 {
return []string{}, fmt.Errorf("no transcription variants offered")
}
if i == len(trans)-1 {
return trans[len(trans)-1], nil
}
rest, err := g.all(trans, i+1)
if err != nil {
return []string{}, err
}
var result []string
for _, i := range trans[i] {
for _, j := range rest {
result = append(result, i+" "+j)
}
}
return result, nil
}
// RightVars traverses the right-hand side of the complete double trie.
func (g *G2P) rightVars(w string, frontIdx, backIdx int, trie *trieNode) *trieNode {
wRune := []rune(w)
var curChar string
if frontIdx < len(wRune) {
curChar = string(wRune[frontIdx])
}
if t, ok := trie.right[curChar]; frontIdx < len(wRune) && ok {
frontIdx++
t := g.rightVars(w, frontIdx, backIdx, t)
if t != nil {
return t
}
}
if t, ok := trie.right["$"]; frontIdx == len(wRune) && ok {
t := g.leftVars(w, backIdx, t)
if t != nil {
return t
}
}
t := g.leftVars(w, backIdx, trie)
if t != nil {
return t
}
if trie.nchars != 0 {
return trie
}
return nil
}
// LeftVars traverses left-hand side part of the complete double trie.
func (g *G2P) leftVars(w string, backIdx int, trie *trieNode) *trieNode {
wRune := []rune(w)
curChar := string(wRune[len(wRune)-2-backIdx])
if t, ok := trie.left[curChar]; backIdx >= 0 && ok {
backIdx--
t := g.leftVars(w, backIdx, t)
if t != nil {
return t
}
}
if t, ok := trie.left["$"]; backIdx == -1 && ok {
if t.nchars != 0 {
return t
}
}
if trie.nchars != 0 {
return trie
}
return nil
}