[LeetCode 30] Substring with Concatenation of All Words

Hash Table Two Pointers String

You are given a string, s, and a list of words, words, that are all of the same length. Find all starting indices of substring(s) in s that is a concatenation of each word in words exactly once and without any intervening characters.

For example, given: s: "barfoothefoobarman" words: ["foo", "bar"]

You should return the indices: [0,9].
(order does not matter).

Diffculty
Hard

Similar Problems
[LeetCode ] Minimum Window Substring Hard

Analysis

// 44ms, 87.2% class Solution { public: vector findSubstring(string S, vector &L) { unordered_map needFind; // L中单词出现的次数 for(int i = 0; i < L.size(); i++) needFind[L[i]]++; int n = (int)S.length(); int wordLen = L[0].size(); vector res; for(int i = 0; i < wordLen; ++i){ // 为了不遗漏从s的每一个位置开始的子串，第一层循环为单词的长度 unordered_map hasFound; // 当前窗口中单词出现的次数 int winStart = i, cnt = 0; // winStart为窗口起始位置, cnt为当前窗口中的单词数目 for(int winEnd = i; winEnd <= n - wordLen; winEnd += wordLen){ string curWword = S.substr(winEnd, wordLen); // 之前的窗口为[winStart, winEnd)

            if(needFind.find(curWword) != needFind.end()){ // 如果这个word是字典中的一员
                hasFound[curWword]++; // 把当前找到的++

                if(hasFound[curWword] <= needFind[curWword]) // 如果仍然小于要找的数量，加到window中去，即 ++cnt
                    cnt++;
                else{
                // 当前的单词在L中，但是它已经在窗口中出现了相应的次数，不应该加入窗口
                // 此时，应该把窗口起始位置像右移动到，该单词第一次出现的位置的下一个单词位置
                    for(int k = winStart; ; k += wordLen){
                        string tmpstr = S.substr(k, wordLen);
                        hasFound[tmpstr]--;
                        if(tmpstr == curWword){
                            winStart = k + wordLen;
                            break;
                        }
                        cnt--;
                    }
                }

                if(cnt == L.size())
                    res.push_back(winStart);
            }
            else{
            // 发现不在L中的单词， 则前面的window清除，从下一个word的位置重新开始
                winStart = winEnd + wordLen;
                hasFound.clear();
                cnt = 0;
            }
        }
    }
    return res;
}

};

// travel all the words combinations to maintain a window // there are wl(word len) times travel // each time, n/wl words, mostly 2 times travel for each word // one left side of the window, the other right side of the window // so, time complexity O(wl 2 N/wl) = O(2N)

// 52 ms, 78.45% class Solution{ public: vector findSubstring(string S, vector &L) { vector ans; int n = S.size(), cnt = L.size(); if (n <= 0 || cnt <= 0) return ans;

    // init word occurence
    unordered_map<string, int> dict;
    for (int i = 0; i < cnt; ++i) ++dict[L[i]];

    // travel all sub string combinations
    int wl = L[0].size();
    for (int i = 0; i < wl; ++i) {
        int left = i, count = 0;
        unordered_map<string, int> tdict;
        for (int j = i; j <= n - wl; j += wl) {
            string str = S.substr(j, wl);
            // a valid word, accumulate results
            if (dict.count(str)) {
                ++tdict[str];
                if (tdict[str] <= dict[str])
                    ++count;
                else {
                    // a more word, advance the window left side possiablly
                    while (tdict[str] > dict[str]) {
                        string str1 = S.substr(left, wl);
                        tdict[str1]--;
                        if (tdict[str1] < dict[str1]) count--;
                        left += wl;
                    }
                }
                // come to a result
                if (count == cnt) {
                    ans.push_back(left);
                    // advance one word
                    tdict[S.substr(left, wl)]--;
                    count--;
                    left += wl;
                }
            }
            // not a valid word, reset all vars
            else {
                tdict.clear();
                count = 0;
                left = j + wl;
            }
        }
    }

    return ans;
}

};

思路：

和strStr那题的双指针解法类似。关键在于如何判断以任意i起始的S的substring是否整个L的concatenation。这里显然要用到hash table。由于L中可能存在重复的word，所以hash table的key = word，val = count of the word。

在建立好L的hash table后，对每个S[i]进行检查。这里的一个技巧建立一个新的hash table记录已经找到的word。因为L的hash table需要反复利用，不能被修改，并且如果以hash table作为参数进行值传递的化，时间空间消耗都很大。

// 732ms, 39.22% class Solution { public: vector findSubstring(string S, vector &L) { vector allPos; if(L.empty()) return allPos; int totalWords = L.size(); int wordSize = L[0].size(); int totalLen = wordSize * totalWords; if(S.size()<totalLen) return allPos;

    unordered_map<string,int> wordCount;
    for(int i=0; i<totalWords; i++)
        wordCount[L[i]]++;

    for(int i=0; i<=S.size()-totalLen; i++) {
        if(checkSubstring(S, i, wordCount, wordSize, totalWords))
            allPos.push_back(i);
    }
    return allPos;
}

bool checkSubstring(string S, int start, unordered_map<string,int> &wordCount, int wordSize, int totalWords) {
    if(S.size()-start+1 < wordSize*totalWords) return false;
    unordered_map<string,int> wordFound;

    for(int i=0; i<totalWords; i++) {
        string curWord = S.substr(start+i*wordSize,wordSize);
        if(!wordCount.count(curWord)) return false;
        wordFound[curWord]++;
        if(wordFound[curWord]>wordCount[curWord]) return false;
    }
    return true;
}

};

I think the following code is self-explanatory enough. We use an unordered_map counts to record the expected times of each word and another unordered_map seen to record the times we have seen. Then we check for every possible position of i. Once we meet an unexpected word or the times of some word is larger than its expected times, we stop the check. If we finish the check successfully, push i to the result indexes.

// 792ms, 24.51% class Solution { public: vector findSubstring(string s, vector& words) { unordered_map counts; for (string word : words) counts[word]++; int n = s.length(), num = words.size(), len = words[0].length(); vector indexes; for (int i = 0; i < n - num len + 1; i++) { unordered_map seen; int j = 0; for (; j < num; j++) { string word = s.substr(i + j len, len); if (counts.find(word) != counts.end()) { // counts.count(word) > 0 seen[word]++; if (seen[word] > counts[word]) break; } else break; } if (j == num) indexes.push_back(i); } return indexes; } };

3.12 (30) Substring With Concatenation of All Words

[LeetCode 30] Substring with Concatenation of All Words

Analysis

results matching ""

No results matching ""