Algo-Quicksheet/chapterString.tex at master · algorhythms/Algo-Quicksheet · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
% !TEX root = algo-quicksheet.tex
\chapter{String}

\section{Palindrome}
\subsection{Palindrome anagram}
\begin{itemize}
\item \rih{Test palindrome anagram.} Char counter, number of odd count should $\leq 0$.
\item \rih{Count palindrome anagram.} See Section-\ref{N_objects_K_types}.
\item \rih{Construct palindrome anagram.} Construct all palindrome anagrams given a string \pyinline{s}.
\end{itemize}

\runinhead{Core Clues:}
\begin{enumerate}
\item Different choices of char $\Ra$ backtracking, choose the next from the char counters of \pyinline{s}.
\item To avoid loop $\Ra$ jump parent char
\end{enumerate}

\begin{python}
def backtrack(self, s, counters, pi, cur, ret):
  if len(cur) == len(s):
    ret.append(cur)
    return

  for k in counters.keys():
    # jump the parent
    if k != pi and counters[k] > 0:
      for n in range(1, counters[k]/2+1):
        counters[k] -= n*2
        self.backtrack(s, counters, k, k*n+cur+k*n, ret)
        counters[k] += n*2
\end{python}

Jump within the iterations of choice to avoid dead loop.

How to handle odd counter? Check outside the backtrack, and potentially raise error.

\subsection{Number}
\runinhead{Next Permutation.} Given a string $n$ representing an int, return the closest int (not including itself), which is a palindrome. For example $n = `123'$, return $`121'$.

\begin{enumerate}
\item Palindrome $\Ra$ Mirror the left half of $n$.
\item 19997 has two candidates 19991 and 20002 $\Ra$ for the half, $+/-1$ for carry/borrow
\item 1000 has two candidates 999 and 1001 $\Ra$ More/less digits of $n$ $\Ra$ checking numbers $10..0, 9..9$
\end{enumerate}
\begin{python}
def nearestPalindromic(self, s: str) -> str:
  l = len(s)
  odd_l = l & 1
  if odd_l:
    half = s[:l//2] + s[l//2]
  else:
    half = s[:l//2]

  def mir(half: str) -> str:
    if not odd_l:
      return half + half[::-1]
    else:
      return half + half[::-1][1:]

  # candidates
  cands = {
    mir(str(int(half) - 1)),
    mir(half),
    mir(str(int(half) + 1)),
  }
  cands |= {
    str(10 ** l + 1),
    str(10 ** (l - 1) - 1)
  }
  cands.discard(s)
  return min(
    cands,
    key=lambda e: (abs(int(s) - int(e)), int(e))
  )
\end{python}
\section{Anagram}
\runinhead{Group of strings by anagram.}

Using a frequency vector
\begin{python}
class Solution:
  def groupAnagrams(self, strs):
    # key: 26-tuple counts; value: list of words
    d = defaultdict(list)
    for s in strs:
      cnt = [0] * 26
      for ch in s:
        cnt[ord(ch) - ord('a')] += 1
      d[tuple(cnt)].append(s)
    return list(d.values())
\end{python}

\section{KMP}
Find the pattern $p$ in string $s$ within complexity of $O(|P|+|S|)$.

\subsection{Prefix matching suffix table}
Intuition: when a mismatch happens at $p_i$ vs $s_i$, don't restart from $p_0$; instead jump to the next best candidate $i$ right after the matched prefix \& suffix, keeping the already verified prefix.

Let $L_i$ be the length of the longest proper prefix that matches a suffix ending at $p_i$.
\begin{enumerate}
\item A proper prefix is a prefix that is not equal to the whole string itself. Proper ensures that $L_i < i+1$.
\item Need to maintain the longest proper prefix of a substring that is also a suffix of it.
\end{enumerate}

\begin{table}[h!]
\centering
\begin{tabular}{c|ccccccc}
  \toprule
  \textbf{i}   & 0 & 1 & 2 & 3 & 4 & 5 & 6 \\
  \midrule
  \textbf{$p_i$} & A & B & C & D & A & B & D \\
  \textbf{$L_i$} & 0 & 0 & 0 & 0 & 1 & 2 & 0 \\
  \bottomrule
\end{tabular}
\end{table}
\begin{python}
def kmp_lps(p):
    L = [0] * len(p)
    i = 1
    pre = 0
    while i < len(p):
        if p[i] == p[pre]:
            L[i] = pre + 1
            pre += 1
            i += 1
        elif pre:  # fallback
            pre = L[pre - 1]
        else:  # no fallback
            L[i] = 0
            i += 1

    return L
\end{python}
Time complexity:
\begin{itemize}
\item From code itself it appears to be $O(|P|^2)$.
\item Define $\Delta = i - pre$.
\item $i$ never goes backward - $i$ either forwards of stays.
\item Any time $i$ doesn’t move forward, $\Delta$ must rise
\item $j$ is bounded by $O(|P|)$ and $\Delta$ is bounded by $O(|P|)$
\end{itemize}
\subsection{Searching algorithm}
\begin{figure}[]
\centering
\subfloat{\includegraphics[width=0.8\linewidth]{kmp_presuffix}}
\caption{KMP example}
\label{fig:kmp_presuffix}
\end{figure}


\begin{python}
def kmp_search(p, s):
    L = kmp_lps(p)
    ret = []
    i = 0
    j = 0
    while j < len(s):
        if p[i] == s[j]:
            i += 1
            j += 1
            if i == len(p):
                ret.append(j - i)
                i = L[i - 1]
        elif i:
            i = L[i - 1]
        else:
            j += 1

    return ret
\end{python}
Time compelxity:
\begin{itemize}
\item Define $\Delta = j - i$.
\item $j$ never goes backward
\item Any time $j$ doesn’t move forward, $\Delta$ must rise
\item $j$ is bounded by $O(|S|)$ and $\Delta$ is bounded by $O(|P|+|S|)$
\end{itemize}
\subsection{Applications}
\begin{enumerate}
\item Find needle in haystack.
\item Shortest palindrome
\end{enumerate}
\subsection{Subsequence}
Given a string $s$ and an array of strings $words$, return the number of $words_i$ that is a subsequence of $s$. For example, input: \pyinline{s = "abcde"}, \pyinline{words = ["a","bb","acd","ace"]}

\rih{Core clues:}
\begin{enumerate}
\item Test whether one string is another's subsequence is straightforwad, how to test multiple strings is one's subsequence? $\Ra$ consume the multiple strings in one iteartion.
\item Each character can be a candidate $\Ra$ char map.
\item Each $words_i$ is only match once $\Ra$ iterator
\end{enumerate}
\begin{python}
def numMatchingSubseq(self, S, words) -> int:
  """
  Linear O(|S| + sum(|word|))
  no need to if-check with HashMap + Iterator
  """
  itr_lsts = defaultdict(list)
  for w in words:
    itr_lsts[w[0]].append(iter(w[1:]))

  for c in S:
    itrs = itr_lsts.pop(c, [])
    for itr in itrs:
      ch = next(itr, None)
      itr_lsts[ch].append(itr)

  return len(itr_lsts[None])
\end{python}
Note \pyinline{itr_lsts} can be short formed as \pyinline{itrss}