Coverage for postrfp/model/questionnaire/b36.py: 99%

78 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-22 21:34 +0000

1from collections import defaultdict 

2from typing import Iterable, Protocol, Optional, TYPE_CHECKING 

3 

4if TYPE_CHECKING: 

5 from postrfp.model import Section 

6 

7 

8_b36_cache: dict[int, str] = dict() 

9_number_cache: dict[str, str] = dict() 

10 

11 

12def base36encode(int_number: int): 

13 """ 

14 Converts an integer into a base36 string two characters long 

15 

16 @param int_number - an int or long between 0 and 1296 

17 """ 

18 initial_value = int_number 

19 if int_number not in _b36_cache: 

20 if not isinstance(int_number, int): 

21 raise TypeError("int_number must be an integer") 

22 if not 0 < int_number < 1296: 

23 raise ValueError( 

24 "int_number must in range 1 - 1295, received %s" % int_number 

25 ) 

26 

27 alphabet = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" 

28 base36 = "" 

29 while int_number: 

30 int_number, i = divmod(int_number, 36) 

31 base36 = alphabet[i] + base36 

32 _b36_cache[initial_value] = base36.rjust(2, "0") 

33 

34 return _b36_cache[initial_value] 

35 

36 

37def to_b36(string_number: str): 

38 """ 

39 Converts a position number (e.g. 23.1.8) into a base36 string (0N0108) 

40 """ 

41 if not string_number: 

42 return string_number 

43 return "".join(base36encode(int(el)) for el in string_number.split(".")) 

44 

45 

46def from_b36(db_string): 

47 """ 

48 Converts a base36 encoded position string into human readable form, 

49 e.g. '0N0108' -> 23.1.18. 

50 

51 Called by every row for a question or section query, and surprisingly expensive 

52 For fetch question number and id for 1,500 this function accounts for 50% of 

53 the runtime. 

54 

55 To eliminate this a cache is used to store values. 

56 """ 

57 if db_string not in _number_cache: 

58 if not db_string: 

59 return db_string 

60 _number_cache[db_string] = ".".join( 

61 "%s" % int(db_string[x : x + 2], 36) for x in range(0, len(db_string), 2) 

62 ) 

63 return _number_cache[db_string] 

64 

65 

66def visible_relatives_regex(section_b36_number: str): 

67 """ 

68 Create a regular expression for finding ancestor and ancestor sibling sections for 

69 the given section b36_number. This is useful for returning a subset of the questionnaire tree 

70 necessary for rendering a tree menu without returning the entire questionnaire. 

71 

72 """ 

73 # Build regex by processing each 2-character chunk of the section Number 

74 # starting from the parent of the section given by section_number 

75 parent_index = len(section_b36_number) 

76 n = section_b36_number 

77 # For each chunk build a regex similar to ^02\w{2}$ - i.e. find immediate offspring 

78 # for each chunk 

79 rels_regex_list = ["^" + n[0 : i + 2] + r".{2}$" for i in range(0, parent_index, 2)] 

80 

81 # The root section has no number so root + immediate children must be special-cased: 

82 rels_regex_list.append(r"^.{0,2}$") 

83 

84 return r"|".join(rels_regex_list) 

85 

86 

87class Node(Protocol): 

88 id: int 

89 parent_id: Optional[int] 

90 position: int 

91 

92 @property 

93 def base36(self) -> str: 

94 return base36encode(self.position) 

95 

96 

97def materialised_paths( 

98 nodes: Iterable[Node], initial_section: "Section" 

99) -> dict[int, str]: 

100 """ 

101 Find ancestor paths for a list of nodes with id, position, 

102 and parent_id attributes. The numbering starts from initial_section.number. 

103 

104 Returns a dictionary mapping id > path e.g. 

105 { 

106 23; "020302", // If initial_section.b36_number was "02" and node 23 is its child at pos 3, then child at pos 2 

107 25: "020303" 

108 } 

109 """ 

110 # parent_id -> [children] 

111 children = defaultdict(list) 

112 initial_node: Optional[Node] = None 

113 

114 for node_item in nodes: 

115 children[node_item.parent_id].append(node_item) 

116 if node_item.id == initial_section.id: 

117 initial_node = node_item 

118 

119 for ch_list in children.values(): 

120 ch_list.sort(key=lambda n: n.position) 

121 

122 initial_path: str = ( 

123 "" if initial_section.b36_number is None else initial_section.b36_number 

124 ) 

125 

126 paths = {} 

127 # The stack stores tuples of (node_to_process, path_of_this_node) 

128 stack = [(initial_node, initial_path)] 

129 

130 while stack: 

131 node, current_node_path = stack.pop() 

132 assert node is not None 

133 paths[node.id] = current_node_path 

134 for child_node in reversed(children.get(node.id, [])): 

135 encoded_pos = base36encode(child_node.position) 

136 child_full_path = f"{current_node_path}{encoded_pos}" 

137 stack.append((child_node, child_full_path)) 

138 

139 return paths 

140 

141 

142def parse_path(path: str) -> list[int]: 

143 return [int(path[i : i + 2], 36) for i in range(0, len(path), 2)] 

144 

145 

146def nearest_common_ancestor(b36_paths: list[str]) -> str: 

147 """ 

148 Given a list of (raw) position numbers, find the nearest common ancestor. 

149 If only one path is provided, return the first parent of that path, if it exists. 

150 If the paths provided have only one b36 element, the nearest common ancestor is the root (''). 

151 """ 

152 if len(b36_paths) == 1: 

153 path = parse_path(b36_paths[0]) 

154 if len(path) > 1: 

155 # Remove the last element to get the parent path 

156 parent_path = path[:-1] 

157 return "".join(base36encode(x) for x in parent_path) 

158 return "" # Return the root if no parent exists 

159 

160 split_paths = [parse_path(p) for p in b36_paths] 

161 

162 common = [] 

163 for levels in zip(*split_paths): 

164 if all(level == levels[0] for level in levels): 

165 common.append(levels[0]) 

166 else: 

167 break 

168 

169 return "".join(base36encode(x) for x in common)