单词矩阵

标签: 字典树 数组 字符串 回溯

难度: Hard

给定一份单词的清单,设计一个算法,创建由字母组成的面积最大的矩形,其中每一行组成一个单词(自左向右),每一列也组成一个单词(自上而下)。不要求这些单词在清单里连续出现,但要求所有行等长,所有列等高。

如果有多个面积最大的矩形,输出任意一个均可。一个单词可以重复使用。

示例 1:

输入: ["this", "real", "hard", "trh", "hea", "iar", "sld"]
输出:
[
   "this",
   "real",
   "hard"
]

示例 2:

输入: ["aa"]
输出: ["aa","aa"]

说明:

  • words.length <= 1000
  • words[i].length <= 100
  • 数据保证单词足够随机

Submission

运行时间: 528 ms

内存: 17.1 MB

class Solution:
    def __init__(self):
        self.words = []

        self.word_map = dict()
        self.word_all = dict()
        self.word_len_max = 2  # 记录最大单词长度,便于后续遍历

        self.ans_rect = None
        self.ans_area = 0  # 记录当前算出的最大面积

    def maxRectangle(self, words: [str]) -> [str]:
        self.words = words
        self.add_word_to_map()
        self.check_up_edge()

        return self.ans_rect

    # 数据初始化
    def add_word_to_map(self):
        for word_pos, word in enumerate(self.words):
            word_length = len(self.words[word_pos])
            word_head = self.words[word_pos][:1]
            word_end = self.words[word_pos][-1:]

            if word_length not in self.word_map.keys():
                self.word_map[word_length] = dict()
            if word_head not in self.word_map[word_length].keys():
                self.word_map[word_length][word_head] = dict()
            if word_end not in self.word_map[word_length][word_head].keys():
                self.word_map[word_length][word_head][word_end] = []
            self.word_map[word_length][word_head][word_end].append(word_pos)

            self.word_all[word] = word_pos
            if word_length > self.word_len_max:
                self.word_len_max = word_length

    # 选取上边并准备进行检查
    def check_up_edge(self):
        for up_edge_len in range(self.word_len_max, 1, -1):
            # 剪枝,如果上边长乘以最长边都得不到更大的面积,那就没必要算下去了
            if up_edge_len * self.word_len_max <= self.ans_area:
                break
            if up_edge_len in self.word_map:
                for up_edge_head in self.word_map[up_edge_len]:
                    for up_edge_end in self.word_map[up_edge_len][up_edge_head]:
                        for up_edge_word_pos in self.word_map[up_edge_len][up_edge_head][up_edge_end]:
                            # up_edge_word_pos 为上边候选,接下来选取左边候选
                            self.check_left_edge(self.words[up_edge_word_pos])

    # 根据上边,选取左边并准备进行检查
    def check_left_edge(self, up_edge_word):
        for left_edge_len in range(self.word_len_max, 1, -1):
            # 剪枝,上边与左边决定了当前矩形的面积
            if len(up_edge_word) * left_edge_len <= self.ans_area:
                break
            if left_edge_len in self.word_map:
                # 左首 等于 上首
                left_edge_head = up_edge_word[:1]
                if left_edge_head in self.word_map[left_edge_len]:
                    for left_edge_end in self.word_map[left_edge_len][left_edge_head]:
                        for left_edge_word_pos in self.word_map[left_edge_len][left_edge_head][left_edge_end]:
                            self.check_right_edge(up_edge_word, self.words[left_edge_word_pos])

    # 根据上边、左边,选取右边并准备进行检查
    def check_right_edge(self, up_edge_word, left_edge_word):
        # 右长 等于 左长,右首 等于 上尾
        right_edge_len = len(left_edge_word)
        right_edge_head = up_edge_word[-1:]

        if right_edge_head in self.word_map[right_edge_len]:
            for right_edge_end in self.word_map[right_edge_len][right_edge_head]:
                for right_edge_word_pos in self.word_map[right_edge_len][right_edge_head][right_edge_end]:
                    self.check_down_edge(up_edge_word, left_edge_word, self.words[right_edge_word_pos])

    # 根据上边、左边、右边,选取下边并准备进行检查
    def check_down_edge(self, up_edge_word, left_edge_word, right_edge_word):
        # 下长 等于 上长,下首 等于 左尾,下尾 等于 右尾
        down_edge_len = len(up_edge_word)
        down_edge_head = left_edge_word[-1:]
        down_edge_end = right_edge_word[-1:]
        if down_edge_head in self.word_map[down_edge_len]:
            if down_edge_end in self.word_map[down_edge_len][down_edge_head]:
                for down_edge_word_pos in self.word_map[down_edge_len][down_edge_head][down_edge_end]:
                    self.check_frame(up_edge_word, left_edge_word, right_edge_word, self.words[down_edge_word_pos])

    # 对构造出的框架进行检查,先粗筛,后细查
    def check_frame(self, up_edge_word, left_edge_word, right_edge_word, down_edge_word):
        # 先做快速检查,同时得到一个包含了全部可能性的数组
        demo_box = self.quick_check(up_edge_word, left_edge_word, right_edge_word, down_edge_word)
        if not demo_box:
            return

        # 接下来做完全检查
        check_ans = self.full_check(demo_box)
        if check_ans:
            self.ans_rect = check_ans
            self.ans_area = len(check_ans) * len(check_ans[0])

    # 快速检查,先粗略的检查框架是否稳固(是否存在符合框架纵横相应长度和首尾的字符串)
    def quick_check(self, up_edge_word, left_edge_word, right_edge_word, down_edge_word):
        # 先查纵列,因为纵列暂时不存
        for col in range(1, len(up_edge_word) - 1):
            if not self.find_str_by(len(left_edge_word), up_edge_word[col:col + 1], down_edge_word[col:col + 1]):
                return None

        # 再查横排,横排顺手存下来(注意每个位置存的都是字符串数组,代表了所有可能)
        demo_box = [[up_edge_word]]
        for row in range(1, len(left_edge_word) - 1):
            find_res = self.find_str_by(len(up_edge_word), left_edge_word[row:row + 1], right_edge_word[row:row + 1])
            if not find_res:
                return None
            else:
                demo_box.append(find_res)
        # 别忘了加上最后一行
        demo_box.append([down_edge_word])
        return demo_box

    # 根据长度和首尾字符,返回匹配字符串数组
    def find_str_by(self, str_len, str_head, str_end):
        if str_len in self.word_map:
            if str_head in self.word_map[str_len]:
                if str_end in self.word_map[str_len][str_head]:
                    return list(map(lambda x: self.words[x], self.word_map[str_len][str_head][str_end]))
        return None

    # 生成并检查全部存在可能的矩形
    def full_check(self, demo_box, line=0):
        # 矩形生成完毕,就拿去检查一下
        if line >= len(demo_box):
            word_box = list(map(lambda x: x[0], demo_box))
            if self.box_check(word_box):
                return word_box
            else:
                return None

        # 因为存在多种可能性,此处递归一下,将每一行处理成只有一种情况
        for str_canbe in demo_box[line]:
            box_copy = copy.deepcopy(demo_box)
            box_copy[line] = [str_canbe]
            check_res = self.full_check(box_copy, line + 1)
            if check_res:
                return check_res
        return None

    # 检查具体的一个个矩形
    def box_check(self, word_box):
        # 四边在生成的时候已经检查过了
        # 横向直接是使用单词生成的
        # 所以只需要检查纵向部分就好
        for col in range(1, len(word_box[0]) - 2):
            check_word = ""
            for row in range(0, len(word_box)):
                check_word += word_box[row][col]
            if check_word not in self.word_all:
                return False
        return True

Explain

该题解采用了递归和回溯的方式来尝试构造符合条件的矩形。首先通过单词初始化将所有的单词按照长度、首字母和尾字母分类存储。接着从最长的单词开始尝试作为矩形的上边,并从单词的头尾字母出发向下递归,以此构造出可能的左边、右边和下边。构建过程中采用了剪枝策略,如果当前正在构建的矩形面积已经小于已知的最大矩形面积,则停止当前分支的进一步搜索。每次成功构造出一个矩形后,检查该矩形的所有行和列是否都是有效单词,并更新最大面积和最佳矩形结果。

时间复杂度: O(n^L)(理论最坏情况)

空间复杂度: O(n)


# Definition of Solution class which handles the rectangle creation problem
class Solution:
    def __init__(self):
        self.words = []

        self.word_map = dict()  # Stores words grouped by length, start and end character
        self.word_all = dict()  # Dictionary to check the existence of a vertical word directly
        self.word_len_max = 2  # Tracks the maximum length of words encountered

        self.ans_rect = None  # Stores the largest found rectangle
        self.ans_area = 0  # Tracks the area of the largest found rectangle

    def maxRectangle(self, words: [str]) -> [str]:
        self.words = words
        self.add_word_to_map()  # Organize words in maps
        self.check_up_edge()  # Begin attempting to form rectangles from potential top edges

        return self.ans_rect

    def add_word_to_map(self):
        # Initialize word mappings
        for word_pos, word in enumerate(self.words):
            word_length = len(self.words[word_pos])
            word_head = self.words[word_pos][:1]  # First character of word
            word_end = self.words[word_pos][-1:]  # Last character of word

            if word_length not in self.word_map.keys():
                self.word_map[word_length] = dict()
            if word_head not in self.word_map[word_length].keys():
                self.word_map[word_length][word_head] = dict()
            if word_end not in self.word_map[word_length][word_head].keys():
                self.word_map[word_length][word_head][word_end] = []
            self.word_map[word_length][word_head][word_end].append(word_pos)

            self.word_all[word] = word_pos
            if word_length > self.word_len_max:
                self.word_len_max = word_length

    def check_up_edge(self):
        # Begin with potential top edges of the largest size and work downwards
        for up_edge_len in range(self.word_len_max, 1, -1):
            if up_edge_len * self.word_len_max <= self.ans_area:
                break
            if up_edge_len in self.word_map:
                for up_edge_head in self.word_map[up_edge_len]:
                    for up_edge_end in self.word_map[up_edge_len][up_edge_head]:
                        for up_edge_word_pos in self.word_map[up_edge_len][up_edge_head][up_edge_end]:
                            self.check_left_edge(self.words[up_edge_word_pos])  # Attempt to form a left edge

    def check_left_edge(self, up_edge_word):
        # For each potential top edge, try to form a valid left edge
        for left_edge_len in range(self.word_len_max, 1, -1):
            if len(up_edge_word) * left_edge_len <= self.ans_area:
                break
            if left_edge_len in self.word_map:
                left_edge_head = up_edge_word[:1]  # The first character must match the first of the top edge
                if left_edge_head in self.word_map[left_edge_len]:
                    for left_edge_end in self.word_map[left_edge_len][left_edge_head]:
                        for left_edge_word_pos in self.word_map[left_edge_len][left_edge_head][left_edge_end]:
                            self.check_right_edge(up_edge_word, self.words[left_edge_word_pos])  # Check if a valid right edge can be formed

    def check_right_edge(self, up_edge_word, left_edge_word):
        # Matching a right edge with the same length as the left and starting with the last character of the top edge
        right_edge_len = len(left_edge_word)
        right_edge_head = up_edge_word[-1:]

        if right_edge_head in self.word_map[right_edge_len]:
            for right_edge_end in self.word_map[right_edge_len][right_edge_head]:
                for right_edge_word_pos in self.word_map[right_edge_len][right_edge_head][right_edge_end]:
                    self.check_down_edge(up_edge_word, left_edge_word, self.words[right_edge_word_pos])  # Finally, attempt to find a valid bottom edge

    def check_down_edge(self, up_edge_word, left_edge_word, right_edge_word):
        # Match the bottom edge starting with the last character of the left edge and ending with the last of the right
        down_edge_len = len(up_edge_word)
        down_edge_head = left_edge_word[-1:]
        down_edge_end = right_edge_word[-1:]
        if down_edge_head in self.word_map[down_edge_len]:
            if down_edge_end in self.word_map[down_edge_len][down_edge_head]:
                for down_edge_word_pos in self.word_map[down_edge_len][down_edge_head][down_edge_end]:
                    self.check_frame(up_edge_word, left_edge_word, right_edge_word, self.words[down_edge_word_pos])  # Assess if the completed frame can form a valid rectangle

    def check_frame(self, up_edge_word, left_edge_word, right_edge_word, down_edge_word):
        # Fast initial check if the rectangle holds potential before verifying each possible configuration
        demo_box = self.quick_check(up_edge_word, left_edge_word, right_edge_word, down_edge_word)
        if not demo_box:
            return

        check_ans = self.full_check(demo_box)  # Detailed verification of each configuration to finalize the best rectangle
        if check_ans:
            self.ans_rect = check_ans
            self.ans_area = len(check_ans) * len(check_ans[0])

    def quick_check(self, up_edge_word, left_edge_word, right_edge_word, down_edge_word):
        # Preliminary check for column validity before finalizing the row configurations
        for col in range(1, len(up_edge_word) - 1):
            if not self.find_str_by(len(left_edge_word), up_edge_word[col:col + 1], down_edge_word[col:col + 1]):
                return None

        demo_box = [[up_edge_word]]
        for row in range(1, len(left_edge_word) - 1):
            find_res = self.find_str_by(len(up_edge_word), left_edge_word[row:row + 1], right_edge_word[row:row + 1])
            if not find_res:
                return None
            else:
                demo_box.append(find_res)
        demo_box.append([down_edge_word])
        return demo_box

    def find_str_by(self, str_len, str_head, str_end):
        # Retrieve a list of words based on specific length and start/end character
        if str_len in self.word_map:
            if str_head in self.word_map[str_len]:
                if str_end in self.word_map[str_len][str_head]:
                    return list(map(lambda x: self.words[x], self.word_map[str_len][str_head][str_end]))
        return None

    def full_check(self, demo_box, line=0):
        # Recursive verification of every possible row configuration to finalize the rectangle
        if line >= len(demo_box):
            word_box = list(map(lambda x: x[0], demo_box))
            if self.box_check(word_box):
                return word_box
            else:
                return None

        for str_canbe in demo_box[line]:
            box_copy = copy.deepcopy(demo_box)
            box_copy[line] = [str_canbe]
            check_res = self.full_check(box_copy, line + 1)
            if check_res:
                return check_res
        return None

    def box_check(self, word_box):
        # Final check for vertical word validity in the formed rectangle
        for col in range(1, len(word_box[0]) - 2):
            check_word = ""
            for row in range(0, len(word_box)):
                check_word += word_box[row][col]
            if check_word not in self.word_all:
                return False
        return True


Explore

The provided '问题列表' contains an unclear or incomplete question with the content 'message'. To provide a relevant explanation or answer, more context or a specific question regarding the algorithm, its logic, boundary details, or characteristics of the data structures used in the '题解' is required. Please specify the question or the aspect of the algorithm or data structure you would like to understand better.