Coverage for postrfp/model/ref.py: 98%

149 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-22 21:34 +0000

1from typing import Optional, Any, TYPE_CHECKING 

2from datetime import datetime 

3from enum import Enum 

4 

5from sqlalchemy import ( 

6 Column, 

7 Table, 

8 Integer, 

9 ForeignKey, 

10 UniqueConstraint, 

11 Index, 

12 func, 

13 event, 

14) 

15from sqlalchemy.orm import Mapped, mapped_column, relationship, validates 

16from sqlalchemy.types import VARCHAR, DateTime, Enum as SqlaEnum, JSON, TEXT 

17import jsonschema_rs 

18 

19from postrfp.model.meta import Base 

20from postrfp.model.humans import Organisation, User 

21from postrfp.model.tags import Tag 

22from postrfp.shared.fsm_entity import FSMEntity 

23from postrfp.shared.constants import CEL_EXPRESSION_MAX_LENGTH 

24 

25if TYPE_CHECKING: 

26 from postrfp.model.questionnaire.qelements import QElement 

27 

28 

29class ContentRelationshipType(Enum): 

30 """Types of relationships between content items""" 

31 

32 PARENT_CHILD = "parent_child" # Hierarchical relationship 

33 REFERENCE = "reference" # Content refers to another content 

34 SUPERSEDES = "supersedes" # Content replaces an older version 

35 RELATED = "related" # Generic related content 

36 DERIVED = "derived" # Content derived from another content 

37 

38 

39# Mapping table for Content <-> Tags 

40content_tags_table = Table( 

41 "ref_content_tags", # Updated table name 

42 Base.metadata, 

43 Column( 

44 "content_id", 

45 Integer, 

46 ForeignKey("ref_contents.id", ondelete="CASCADE"), 

47 index=True, # Updated reference 

48 ), 

49 Column("tag_id", Integer, ForeignKey("tags.id", ondelete="CASCADE"), index=True), 

50 UniqueConstraint("content_id", "tag_id"), 

51) 

52 

53# Mapping table for Content <-> Subjects 

54content_subjects_table = Table( 

55 "ref_content_subjects", 

56 Base.metadata, 

57 Column( 

58 "content_id", 

59 Integer, 

60 ForeignKey("ref_contents.id", ondelete="CASCADE"), 

61 index=True, 

62 ), 

63 Column( 

64 "subject_id", 

65 Integer, 

66 ForeignKey("ref_subjects.id", ondelete="CASCADE"), 

67 index=True, 

68 ), 

69 UniqueConstraint("content_id", "subject_id"), 

70) 

71 

72 

73class DateStamped: 

74 date_created: Mapped[datetime] = mapped_column( 

75 DateTime, 

76 nullable=False, 

77 server_default=func.utc_timestamp(), 

78 ) 

79 

80 # When this content was last updated 

81 date_updated: Mapped[datetime] = mapped_column( 

82 DateTime, 

83 nullable=False, 

84 server_default=func.utc_timestamp(), 

85 server_onupdate=func.utc_timestamp(), 

86 ) 

87 

88 

89class PolicyMixin: 

90 auth_policy: Mapped[Optional[str]] = mapped_column( 

91 VARCHAR(length=CEL_EXPRESSION_MAX_LENGTH), 

92 nullable=True, 

93 comment="CEL expression for authorization rules", 

94 ) 

95 

96 

97class ContentSpec(Base, FSMEntity, DateStamped, PolicyMixin): 

98 __tablename__ = "ref_content_specs" # Updated table name 

99 

100 name: Mapped[str] = mapped_column(VARCHAR(length=128), nullable=False) 

101 description: Mapped[Optional[str]] = mapped_column( 

102 VARCHAR(length=256), nullable=True 

103 ) 

104 spec_doc: Mapped[dict[str, Any]] = mapped_column( 

105 JSON, 

106 nullable=False, 

107 comment="JSON Schema document to validate associated Content", 

108 ) 

109 

110 # Relationship with the org that created this schema 

111 org_id: Mapped[str] = mapped_column( 

112 VARCHAR(length=50), 

113 ForeignKey("organisations.id", ondelete="CASCADE", onupdate="CASCADE"), 

114 nullable=False, 

115 ) 

116 organisation: Mapped[Organisation] = relationship( 

117 Organisation, primaryjoin=org_id == Organisation.id 

118 ) 

119 

120 contents: Mapped[list["Content"]] = relationship("Content", back_populates="schema") 

121 

122 # Back-reference to content maps for this spec 

123 content_maps: Mapped[list["ContentSpecMap"]] = relationship( 

124 "ContentSpecMap", back_populates="content_spec", cascade="all, delete-orphan" 

125 ) 

126 

127 @validates("spec_doc") 

128 def validate_spec(self, _key, schema_dict: dict) -> dict: 

129 """ 

130 Check that the provided schema is a valid 2020-12 JSON Schema document 

131 """ 

132 jsonschema_rs.Draft202012Validator(schema_dict) 

133 return schema_dict 

134 

135 def jsonschema_validate(self, content_data) -> None: 

136 """ 

137 Validate the content_doc data of the provided content_data 

138 """ 

139 jsonschema_rs.validate(self.spec_doc, content_data) 

140 

141 def __repr__(self) -> str: 

142 return f"<Schema: {self.name}>" 

143 

144 

145class Content(Base, FSMEntity, DateStamped, PolicyMixin): 

146 __tablename__ = "ref_contents" 

147 __table_args__ = ( 

148 Index("ix_content_fts", "content_fts", mariadb_prefix="FULLTEXT"), 

149 ) 

150 

151 title: Mapped[str] = mapped_column(VARCHAR(length=256), nullable=False) 

152 content_doc: Mapped[dict[str, Any]] = mapped_column(JSON, nullable=False) 

153 content_fts: Mapped[Optional[str]] = mapped_column(TEXT, nullable=True) 

154 

155 # Who authored this content 

156 author_org_id: Mapped[str] = mapped_column( 

157 VARCHAR(length=50), 

158 ForeignKey("organisations.id", ondelete="CASCADE", onupdate="CASCADE"), 

159 nullable=False, 

160 ) 

161 author_organisation: Mapped[Organisation] = relationship( 

162 Organisation, primaryjoin=author_org_id == Organisation.id 

163 ) 

164 

165 # Who last updated this content 

166 last_updated_by_id: Mapped[str] = mapped_column( 

167 VARCHAR(length=50), 

168 ForeignKey("users.id", ondelete="SET NULL", onupdate="CASCADE"), 

169 nullable=True, 

170 ) 

171 last_updated_by: Mapped[Optional[User]] = relationship( 

172 User, primaryjoin=last_updated_by_id == User.id 

173 ) 

174 

175 # Schema this content follows 

176 schema_id: Mapped[int] = mapped_column( 

177 Integer, 

178 ForeignKey("ref_content_specs.id", ondelete="CASCADE"), # Fixed reference 

179 nullable=False, 

180 ) 

181 schema: Mapped[ContentSpec] = relationship(ContentSpec, back_populates="contents") 

182 

183 # Tags for this content 

184 tags = relationship( 

185 Tag, 

186 secondary=content_tags_table, 

187 passive_deletes=True, 

188 lazy="dynamic", 

189 ) 

190 

191 # Relationships with other content 

192 outgoing_relationships: Mapped[list["ContentRelationship"]] = relationship( 

193 "ContentRelationship", 

194 foreign_keys="ContentRelationship.source_content_id", 

195 back_populates="source_content", 

196 cascade="all, delete-orphan", 

197 ) 

198 

199 incoming_relationships: Mapped[list["ContentRelationship"]] = relationship( 

200 "ContentRelationship", 

201 foreign_keys="ContentRelationship.target_content_id", 

202 back_populates="target_content", 

203 cascade="all, delete-orphan", 

204 ) 

205 

206 # Organization that has edit permission (if different from author org) 

207 editor_org_id: Mapped[Optional[str]] = mapped_column( 

208 VARCHAR(length=50), 

209 ForeignKey("organisations.id", ondelete="SET NULL", onupdate="CASCADE"), 

210 nullable=True, # Null means only the author org can edit 

211 ) 

212 editor_organisation: Mapped[Optional[Organisation]] = relationship( 

213 Organisation, 

214 foreign_keys=[editor_org_id], 

215 primaryjoin="Content.editor_org_id == Organisation.id", 

216 ) 

217 

218 # For single primary subject 

219 primary_subject_id: Mapped[Optional[int]] = mapped_column( 

220 Integer, 

221 ForeignKey("ref_subjects.id", ondelete="SET NULL"), 

222 nullable=True, 

223 index=True, 

224 ) 

225 primary_subject: Mapped[Optional["Subject"]] = relationship( 

226 "Subject", foreign_keys=[primary_subject_id] 

227 ) 

228 

229 # For multiple related subjects 

230 subjects = relationship( 

231 "Subject", 

232 secondary=content_subjects_table, 

233 passive_deletes=True, 

234 lazy="dynamic", 

235 ) 

236 

237 # Add new relationship to revisions 

238 revisions: Mapped[list["ContentRevision"]] = relationship( 

239 "ContentRevision", 

240 back_populates="content", 

241 cascade="all, delete-orphan", 

242 ) 

243 

244 # Helper methods for working with relationships 

245 def add_relationship( 

246 self, 

247 target_content: "Content", 

248 relationship_type: ContentRelationshipType, 

249 relationship_metadata: Optional[dict[str, Any]] = None, 

250 ) -> "ContentRelationship": 

251 """Create a relationship from this content to another content. 

252 

253 Parameters 

254 ---------- 

255 target_content : Content 

256 The target Content instance this content relates to. 

257 relationship_type : ContentRelationshipType 

258 Enum value describing the relationship. 

259 relationship_metadata : Optional[dict] 

260 Arbitrary JSON-serialisable metadata about the relationship. Stored in 

261 the `relationship_metadata` column (previously mis-addressed as 

262 `metadata`). 

263 """ 

264 # Construct with source_content so SQLAlchemy places it in outgoing_relationships 

265 # automatically (no manual append -> avoids duplicate entries) and manages the 

266 # inverse incoming_relationships without SAWarning. 

267 # Construct with source_content so SQLAlchemy sets FK and manages both sides. 

268 # Avoid manual append (would create duplicate collection entry if combined). 

269 # Two-phase setup to avoid SAWarning (object not in session) while still 

270 # ensuring the relationship is present in the parent's collection and both 

271 # FKs are populated before flush. 

272 relationship = ContentRelationship( 

273 relationship_type=relationship_type, 

274 relationship_metadata=relationship_metadata or {}, 

275 ) 

276 # Append first: assigns source_content and adds to session via cascade 

277 self.outgoing_relationships.append(relationship) 

278 # Now assign target side (relationship already in session) 

279 relationship.target_content = target_content 

280 return relationship 

281 

282 def jsonschema_validate(self, content_data) -> None: 

283 """ 

284 Validate the content_doc data of this Content object against the associated 

285 "spec_doc" JSON Schema document of the associated ContentSpec. 

286 """ 

287 if self.schema is None: 

288 raise ValueError( 

289 "schema_id must be set and the instance flushed before validating" 

290 ) 

291 jsonschema_rs.validate(self.schema.spec_doc, content_data) 

292 

293 def __repr__(self) -> str: 

294 return f"<Content # {self.id}: {self.title}>" 

295 

296 

297class ContentRelationship(Base): 

298 """Model representing a relationship between content items""" 

299 

300 __tablename__ = "ref_content_relationships" 

301 

302 id: Mapped[int] = mapped_column(primary_key=True) 

303 source_content_id: Mapped[int] = mapped_column( 

304 ForeignKey("ref_contents.id", ondelete="CASCADE"), 

305 nullable=False, 

306 ) 

307 target_content_id: Mapped[int] = mapped_column( 

308 ForeignKey("ref_contents.id", ondelete="CASCADE"), 

309 nullable=False, 

310 ) 

311 relationship_type: Mapped[ContentRelationshipType] = mapped_column( 

312 SqlaEnum( 

313 ContentRelationshipType, 

314 name="content_relationship_type_enum", 

315 values_callable=lambda obj: [e.value for e in obj], 

316 ), 

317 nullable=False, 

318 ) 

319 # Renamed from metadata to relationship_metadata 

320 relationship_metadata: Mapped[Optional[dict[str, Any]]] = mapped_column( 

321 JSON, nullable=True 

322 ) 

323 

324 # Relationships to the content objects 

325 source_content: Mapped["Content"] = relationship( 

326 "Content", 

327 foreign_keys=[source_content_id], 

328 back_populates="outgoing_relationships", 

329 ) 

330 target_content: Mapped["Content"] = relationship( 

331 "Content", 

332 foreign_keys=[target_content_id], 

333 back_populates="incoming_relationships", 

334 ) 

335 

336 

337class SubjectType(Enum): 

338 """Types of subjects that content can be about""" 

339 

340 COUNTRY = "country" # National entity 

341 REGION = "region" # Geographic region (continent, economic zone, etc.) 

342 ORGANIZATION = "organization" # Company, bank, institution 

343 MARKET = "market" # Stock exchange, trading venue 

344 SECTOR = "sector" # Industry sector or segment 

345 PRODUCT = "product" # Specific product or service 

346 OTHER = "other" # For any other subject type 

347 

348 

349class Subject(Base, PolicyMixin): 

350 """Represents entities (countries, organizations, etc.) that content can be about""" 

351 

352 __tablename__ = "ref_subjects" 

353 

354 id: Mapped[int] = mapped_column(primary_key=True) 

355 name: Mapped[str] = mapped_column(VARCHAR(length=128), nullable=False, index=True) 

356 code: Mapped[Optional[str]] = mapped_column( 

357 VARCHAR(length=32), nullable=True, index=True 

358 ) 

359 description: Mapped[Optional[str]] = mapped_column( 

360 VARCHAR(length=512), nullable=True 

361 ) 

362 

363 subject_type: Mapped[SubjectType] = mapped_column( 

364 SqlaEnum( 

365 SubjectType, 

366 name="subject_type_enum", 

367 values_callable=lambda obj: [e.value for e in obj], 

368 ), 

369 nullable=False, 

370 index=True, 

371 ) 

372 

373 # For hierarchical relationships (e.g., city -> state -> country) 

374 parent_id: Mapped[Optional[int]] = mapped_column( 

375 Integer, ForeignKey("ref_subjects.id", ondelete="SET NULL"), nullable=True 

376 ) 

377 parent: Mapped[Optional["Subject"]] = relationship( 

378 "Subject", remote_side="Subject.id", backref="children" 

379 ) 

380 

381 # Org that manages this subject (optional, for custom subjects) 

382 managing_org_id: Mapped[Optional[str]] = mapped_column( 

383 VARCHAR(length=50), 

384 ForeignKey("organisations.id", ondelete="SET NULL", onupdate="CASCADE"), 

385 nullable=True, 

386 ) 

387 managing_organization: Mapped[Optional[Organisation]] = relationship(Organisation) 

388 

389 # Metadata for the subject (can store country codes, market identifiers, etc.) 

390 subject_metadata: Mapped[Optional[dict[str, Any]]] = mapped_column( 

391 JSON, nullable=True 

392 ) 

393 

394 # Content that has this as primary subject 

395 primary_for_contents: Mapped[list[Content]] = relationship( 

396 Content, 

397 back_populates="primary_subject", 

398 foreign_keys=[Content.primary_subject_id], 

399 ) 

400 

401 # Content that references this subject 

402 referenced_by_contents = relationship( 

403 Content, 

404 secondary=content_subjects_table, 

405 viewonly=True, # Removed passive_deletes=True since it doesn't make sense with viewonly=True 

406 lazy="dynamic", 

407 ) 

408 

409 def __repr__(self) -> str: 

410 return f"<Subject: {self.name} ({self.subject_type.value})>" 

411 

412 

413class ContentRevision(Base): 

414 """Base model for tracking Content changes via JSON Patch""" 

415 

416 __tablename__ = "ref_content_revisions" 

417 

418 id: Mapped[int] = mapped_column(primary_key=True) 

419 entity_type: Mapped[str] = mapped_column( 

420 VARCHAR(length=32), nullable=False, index=True 

421 ) 

422 content_id: Mapped[int] = mapped_column( 

423 Integer, 

424 ForeignKey("ref_contents.id", ondelete="CASCADE"), 

425 nullable=False, 

426 index=True, 

427 ) 

428 

429 timestamp: Mapped[datetime] = mapped_column( 

430 DateTime, nullable=False, server_default=func.utc_timestamp(), index=True 

431 ) 

432 

433 user_id: Mapped[str] = mapped_column( 

434 VARCHAR(length=50), 

435 ForeignKey("users.id", ondelete="SET NULL", onupdate="CASCADE"), 

436 nullable=True, 

437 index=True, 

438 ) 

439 

440 # The JSON Patch operations that were applied 

441 patch_operations: Mapped[list[dict[str, Any]]] = mapped_column(JSON, nullable=False) 

442 

443 # Optional comment describing the change 

444 comment: Mapped[Optional[str]] = mapped_column(VARCHAR(length=512), nullable=True) 

445 

446 # A hash of the entity state after this patch was applied 

447 # Enables quick determination if two versions are identical 

448 snapshot_hash: Mapped[str] = mapped_column(VARCHAR(length=64), nullable=False) 

449 

450 # For version management - incremented for each change to the same entity 

451 version_number: Mapped[int] = mapped_column(Integer, nullable=False) 

452 

453 # Added relationship to Content 

454 content: Mapped["Content"] = relationship("Content", back_populates="revisions") 

455 

456 # Relationship to the user 

457 user: Mapped[Optional[User]] = relationship(User) 

458 

459 

460class ContentSpecMap(Base): 

461 """ 

462 A mapping between Question Elements (QElement) and Content fields. 

463 

464 This defines how answers from questionnaire responses can be used to populate 

465 structured content. Each mapping specifies which question elements correspond 

466 to which fields in the content's JSON document via JSON Pointer expressions. 

467 

468 Example: 

469 A vendor selection RFP might have a question "What is your SLA uptime guarantee?" 

470 This mapping would connect that question element to the "sla.uptime" field 

471 in an SLA content document. 

472 """ 

473 

474 __tablename__ = "ref_content_maps" 

475 

476 name: Mapped[str] = mapped_column(VARCHAR(length=256), nullable=False) 

477 description: Mapped[str] = mapped_column(VARCHAR(length=1024), nullable=False) 

478 content_spec_id: Mapped[int] = mapped_column( 

479 Integer, ForeignKey("ref_content_specs.id", ondelete="CASCADE"), nullable=False 

480 ) 

481 

482 # Relationships 

483 content_spec: Mapped[ContentSpec] = relationship( 

484 ContentSpec, back_populates="content_maps" 

485 ) 

486 pairs: Mapped[list["ContentQElementPair"]] = relationship( 

487 "ContentQElementPair", 

488 back_populates="content_map", 

489 cascade="all, delete-orphan", 

490 ) 

491 

492 # Relationship to QElements through pairs for easy navigation 

493 question_elements = relationship( 

494 "QElement", secondary="ref_content_qelements", viewonly=True, lazy="dynamic" 

495 ) 

496 

497 def __repr__(self): 

498 return f"<ContentSpecMap ID {self.id}, name: {self.name}>" 

499 

500 

501class ContentQElementPair(Base): 

502 """ 

503 A mapping between a QElement and a Content field via JSON Pointer. 

504 

505 This represents the actual connection between a question element and where 

506 its answer should be stored in the content document. The content_reference 

507 field contains a JSON Pointer expression that identifies the location in the 

508 content's JSON document. 

509 

510 Examples of content_reference values: 

511 - "$.sla.uptime" -> content_doc["sla"]["uptime"] 

512 - "$.contact.email" -> content_doc["contact"]["email"] 

513 - "$.features[0].name" -> content_doc["features"][0]["name"] 

514 """ 

515 

516 __tablename__ = "ref_content_qelements" 

517 

518 content_map_id: Mapped[int] = mapped_column( 

519 Integer, ForeignKey("ref_content_maps.id", ondelete="CASCADE"), nullable=False 

520 ) 

521 

522 question_element_id: Mapped[int] = mapped_column( 

523 Integer, ForeignKey("question_elements.id", ondelete="CASCADE"), nullable=False 

524 ) 

525 content_reference: Mapped[str] = mapped_column( 

526 VARCHAR(length=256), 

527 nullable=False, 

528 comment="A JSON Pointer expression to resolve a value in the Content item's json_doc", 

529 ) 

530 

531 # Relationships 

532 content_map: Mapped[ContentSpecMap] = relationship( 

533 ContentSpecMap, back_populates="pairs" 

534 ) 

535 question_element: Mapped["QElement"] = relationship("QElement") 

536 

537 def __repr__(self): 

538 return f"<ContentQElementPair ID {self.id}, qel_id: {self.question_element_id}, ref: {self.content_reference}>" 

539 

540 

541def extract_text_from_json(data: Any) -> str: 

542 """Recursively extract all text from a JSON structure.""" 

543 texts: list[str] = [] 

544 

545 def recurse(obj: Any) -> None: 

546 if isinstance(obj, dict): 

547 for value in obj.values(): 

548 recurse(value) 

549 elif isinstance(obj, list): 

550 for item in obj: 

551 recurse(item) 

552 elif isinstance(obj, str): 

553 texts.append(obj) 

554 # ignore other types 

555 

556 recurse(data) 

557 return " ".join(texts) 

558 

559 

560@event.listens_for(Content, "before_insert") 

561def update_content_fts_before_insert( 

562 mapper: Any, connection: Any, target: Content 

563) -> None: 

564 target.content_fts = extract_text_from_json(target.content_doc) 

565 

566 

567@event.listens_for(Content, "before_update") 

568def update_content_fts_before_update( 

569 mapper: Any, connection: Any, target: Content 

570) -> None: 

571 target.content_fts = extract_text_from_json(target.content_doc) 

572 

573 

574__all__ = [ 

575 "Content", 

576 "ContentSpec", 

577 "ContentRelationship", 

578 "ContentRelationshipType", 

579 "Subject", 

580 "SubjectType", 

581 "ContentRevision", 

582 "ContentSpecMap", 

583 "ContentQElementPair", 

584]