Coverage for postrfp / model / ref.py: 98%

182 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-03 01:35 +0000

1from typing import Optional, Any, TYPE_CHECKING 

2from datetime import datetime 

3from enum import Enum 

4 

5from sqlalchemy import ( 

6 Column, 

7 Table, 

8 Integer, 

9 ForeignKey, 

10 UniqueConstraint, 

11 Index, 

12 func, 

13 event, 

14 text, 

15 false, 

16 true, 

17) 

18from sqlalchemy.orm import ( 

19 Mapped, 

20 mapped_column, 

21 relationship, 

22 validates, 

23) 

24from sqlalchemy.types import VARCHAR, DateTime, Enum as SqlaEnum, JSON, TEXT, BOOLEAN 

25import jsonschema_rs 

26 

27from postrfp.model.meta import Base 

28from postrfp.model.humans import Organisation, User 

29from postrfp.model.tags import Tag 

30from postrfp.shared.fsm_entity import FSMEntity 

31from postrfp.shared.constants import CEL_EXPRESSION_MAX_LENGTH 

32 

33if TYPE_CHECKING: 

34 from postrfp.model.questionnaire.qelements import QElement 

35 

36 

37class ContentRelationshipType(Enum): 

38 """Types of relationships between content items""" 

39 

40 PARENT_CHILD = "parent_child" # Hierarchical relationship 

41 REFERENCE = "reference" # Content refers to another content 

42 SUPERSEDES = "supersedes" # Content replaces an older version 

43 RELATED = "related" # Generic related content 

44 DERIVED = "derived" # Content derived from another content 

45 

46 

47# Mapping table for Content <-> Tags 

48content_tags_table = Table( 

49 "ref_content_tags", # Updated table name 

50 Base.metadata, 

51 Column( 

52 "content_id", 

53 Integer, 

54 ForeignKey("ref_contents.id", ondelete="CASCADE"), 

55 index=True, # Updated reference 

56 ), 

57 Column("tag_id", Integer, ForeignKey("tags.id", ondelete="CASCADE"), index=True), 

58 UniqueConstraint("content_id", "tag_id"), 

59) 

60 

61# Mapping table for Content <-> Subjects 

62content_subjects_table = Table( 

63 "ref_content_subjects", 

64 Base.metadata, 

65 Column( 

66 "content_id", 

67 Integer, 

68 ForeignKey("ref_contents.id", ondelete="CASCADE"), 

69 index=True, 

70 ), 

71 Column( 

72 "subject_id", 

73 Integer, 

74 ForeignKey("ref_subjects.id", ondelete="CASCADE"), 

75 index=True, 

76 ), 

77 UniqueConstraint("content_id", "subject_id"), 

78) 

79 

80 

81class DateStamped: 

82 date_created: Mapped[datetime] = mapped_column( 

83 DateTime, 

84 nullable=False, 

85 server_default=func.utc_timestamp(), 

86 ) 

87 

88 # When this content was last updated 

89 date_updated: Mapped[datetime] = mapped_column( 

90 DateTime, 

91 nullable=False, 

92 server_default=func.utc_timestamp(), 

93 server_onupdate=func.utc_timestamp(), 

94 ) 

95 

96 

97class PolicyMixin: 

98 auth_policy: Mapped[Optional[str]] = mapped_column( 

99 VARCHAR(length=CEL_EXPRESSION_MAX_LENGTH), 

100 nullable=True, 

101 comment="CEL expression for authorization rules", 

102 ) 

103 

104 

105class ContentSpec(Base, FSMEntity, DateStamped, PolicyMixin): 

106 __tablename__ = "ref_content_specs" # Updated table name 

107 

108 version: Mapped[int] = mapped_column( 

109 Integer, nullable=False, server_default=text("1") 

110 ) 

111 __mapper_args__ = {"version_id_col": version} 

112 

113 name: Mapped[str] = mapped_column(VARCHAR(length=128), nullable=False) 

114 

115 is_draft: Mapped[bool] = mapped_column( 

116 BOOLEAN, nullable=False, server_default=true() 

117 ) 

118 

119 description: Mapped[Optional[str]] = mapped_column( 

120 VARCHAR(length=256), nullable=True 

121 ) 

122 spec_doc: Mapped[dict[str, Any]] = mapped_column( 

123 JSON, 

124 nullable=False, 

125 comment="JSON Schema document to validate associated Content", 

126 ) 

127 

128 # Relationship with the org that created this schema 

129 org_id: Mapped[str] = mapped_column( 

130 VARCHAR(length=50), 

131 ForeignKey("organisations.id", ondelete="CASCADE", onupdate="CASCADE"), 

132 nullable=False, 

133 ) 

134 organisation: Mapped[Organisation] = relationship( 

135 Organisation, primaryjoin=org_id == Organisation.id 

136 ) 

137 

138 contents: Mapped[list["Content"]] = relationship("Content", back_populates="schema") 

139 

140 # Back-reference to content maps for this spec 

141 content_maps: Mapped[list["ContentSpecMap"]] = relationship( 

142 "ContentSpecMap", back_populates="content_spec", cascade="all, delete-orphan" 

143 ) 

144 

145 @validates("spec_doc") 

146 def validate_spec(self, _key, schema_dict: dict) -> dict: 

147 """ 

148 Check that the provided schema is a valid 2020-12 JSON Schema document 

149 """ 

150 jsonschema_rs.Draft202012Validator(schema_dict) 

151 return schema_dict 

152 

153 def jsonschema_validate(self, content_data) -> None: 

154 """ 

155 Validate the content_doc data of the provided content_data 

156 """ 

157 jsonschema_rs.validate(self.spec_doc, content_data) 

158 

159 def __repr__(self) -> str: 

160 return f"<Schema: {self.name}>" 

161 

162 

163class Content(Base, FSMEntity, DateStamped, PolicyMixin): 

164 __tablename__ = "ref_contents" 

165 __table_args__ = ( 

166 Index("ix_content_fts", "content_fts", mariadb_prefix="FULLTEXT"), 

167 ) 

168 

169 title: Mapped[str] = mapped_column(VARCHAR(length=256), nullable=False) 

170 content_doc: Mapped[dict[str, Any]] = mapped_column(JSON, nullable=False) 

171 content_fts: Mapped[Optional[str]] = mapped_column(TEXT, nullable=True) 

172 

173 version: Mapped[int] = mapped_column( 

174 Integer, nullable=False, server_default=text("1") 

175 ) 

176 __mapper_args__ = {"version_id_col": version} 

177 

178 is_validated: Mapped[bool] = mapped_column( 

179 BOOLEAN, nullable=False, server_default=false() 

180 ) 

181 

182 # Who authored this content 

183 author_org_id: Mapped[str] = mapped_column( 

184 VARCHAR(length=50), 

185 ForeignKey("organisations.id", ondelete="CASCADE", onupdate="CASCADE"), 

186 nullable=False, 

187 ) 

188 author_organisation: Mapped[Organisation] = relationship( 

189 Organisation, primaryjoin=author_org_id == Organisation.id 

190 ) 

191 

192 # Who last updated this content 

193 last_updated_by_id: Mapped[str] = mapped_column( 

194 VARCHAR(length=50), 

195 ForeignKey("users.id", ondelete="SET NULL", onupdate="CASCADE"), 

196 nullable=True, 

197 ) 

198 last_updated_by: Mapped[Optional[User]] = relationship( 

199 User, primaryjoin=last_updated_by_id == User.id 

200 ) 

201 

202 # Schema this content follows, constrained to existing schemas for which is_draft=False 

203 schema_id: Mapped[int] = mapped_column( 

204 Integer, 

205 ForeignKey("ref_content_specs.id", ondelete="CASCADE"), # Fixed reference 

206 nullable=False, 

207 ) 

208 schema: Mapped[ContentSpec] = relationship(ContentSpec, back_populates="contents") 

209 

210 # Tags for this content 

211 tags = relationship( 

212 Tag, 

213 secondary=content_tags_table, 

214 passive_deletes=True, 

215 lazy="dynamic", 

216 ) 

217 

218 # Relationships with other content 

219 outgoing_relationships: Mapped[list["ContentRelationship"]] = relationship( 

220 "ContentRelationship", 

221 foreign_keys="ContentRelationship.source_content_id", 

222 back_populates="source_content", 

223 cascade="all, delete-orphan", 

224 ) 

225 

226 incoming_relationships: Mapped[list["ContentRelationship"]] = relationship( 

227 "ContentRelationship", 

228 foreign_keys="ContentRelationship.target_content_id", 

229 back_populates="target_content", 

230 cascade="all, delete-orphan", 

231 ) 

232 

233 # Organization that has edit permission (if different from author org) 

234 editor_org_id: Mapped[Optional[str]] = mapped_column( 

235 VARCHAR(length=50), 

236 ForeignKey("organisations.id", ondelete="SET NULL", onupdate="CASCADE"), 

237 nullable=True, # Null means only the author org can edit 

238 ) 

239 editor_organisation: Mapped[Optional[Organisation]] = relationship( 

240 Organisation, 

241 foreign_keys=[editor_org_id], 

242 primaryjoin="Content.editor_org_id == Organisation.id", 

243 ) 

244 

245 # For single primary subject 

246 primary_subject_id: Mapped[Optional[int]] = mapped_column( 

247 Integer, 

248 ForeignKey("ref_subjects.id", ondelete="SET NULL"), 

249 nullable=True, 

250 index=True, 

251 ) 

252 primary_subject: Mapped[Optional["Subject"]] = relationship( 

253 "Subject", foreign_keys=[primary_subject_id] 

254 ) 

255 

256 # For multiple related subjects 

257 subjects = relationship( 

258 "Subject", 

259 secondary=content_subjects_table, 

260 passive_deletes=True, 

261 lazy="dynamic", 

262 ) 

263 

264 # Add new relationship to revisions 

265 revisions: Mapped[list["ContentRevision"]] = relationship( 

266 "ContentRevision", 

267 back_populates="content", 

268 cascade="all, delete-orphan", 

269 ) 

270 

271 # Helper methods for working with relationships 

272 def add_relationship( 

273 self, 

274 target_content: "Content", 

275 relationship_type: ContentRelationshipType, 

276 relationship_metadata: Optional[dict[str, Any]] = None, 

277 ) -> "ContentRelationship": 

278 """Create a relationship from this content to another content. 

279 

280 Parameters 

281 ---------- 

282 target_content : Content 

283 The target Content instance this content relates to. 

284 relationship_type : ContentRelationshipType 

285 Enum value describing the relationship. 

286 relationship_metadata : Optional[dict] 

287 Arbitrary JSON-serialisable metadata about the relationship. Stored in 

288 the `relationship_metadata` column (previously mis-addressed as 

289 `metadata`). 

290 """ 

291 # Construct with source_content so SQLAlchemy places it in outgoing_relationships 

292 # automatically (no manual append -> avoids duplicate entries) and manages the 

293 # inverse incoming_relationships without SAWarning. 

294 # Construct with source_content so SQLAlchemy sets FK and manages both sides. 

295 # Avoid manual append (would create duplicate collection entry if combined). 

296 # Two-phase setup to avoid SAWarning (object not in session) while still 

297 # ensuring the relationship is present in the parent's collection and both 

298 # FKs are populated before flush. 

299 relationship = ContentRelationship( 

300 relationship_type=relationship_type, 

301 relationship_metadata=relationship_metadata or {}, 

302 ) 

303 # Append first: assigns source_content and adds to session via cascade 

304 self.outgoing_relationships.append(relationship) 

305 # Now assign target side (relationship already in session) 

306 relationship.target_content = target_content 

307 return relationship 

308 

309 def jsonschema_validate(self, content_data, raise_on_error: bool = True) -> None: 

310 """ 

311 Validate the content_doc data of this Content object against the associated 

312 "spec_doc" JSON Schema document of the associated ContentSpec. 

313 Sets the is_validated flag based on the result. 

314 

315 Parameters 

316 ---------- 

317 content_data : dict 

318 The content data to validate. 

319 raise_on_error : bool 

320 If True, raises ValidationError on validation failure. If False, just sets is_validated to False. 

321 

322 """ 

323 if self.schema is None: 

324 raise ValueError( 

325 "schema_id must be set and the instance flushed before validating" 

326 ) 

327 try: 

328 jsonschema_rs.validate(self.schema.spec_doc, content_data) 

329 self.is_validated = True 

330 except jsonschema_rs.ValidationError as ve: 

331 self.is_validated = False 

332 if raise_on_error: 

333 raise ve 

334 

335 @property 

336 def etag(self) -> str: 

337 """Generate an ETag for this content based on its ID and version.""" 

338 return f'W/"{self.id}-{self.version}"' 

339 

340 def __repr__(self) -> str: 

341 return f"<Content # {self.id}: {self.title}>" 

342 

343 

344class ContentRelationship(Base): 

345 """Model representing a relationship between content items""" 

346 

347 __tablename__ = "ref_content_relationships" 

348 

349 id: Mapped[int] = mapped_column(primary_key=True) 

350 source_content_id: Mapped[int] = mapped_column( 

351 ForeignKey("ref_contents.id", ondelete="CASCADE"), 

352 nullable=False, 

353 ) 

354 target_content_id: Mapped[int] = mapped_column( 

355 ForeignKey("ref_contents.id", ondelete="CASCADE"), 

356 nullable=False, 

357 ) 

358 relationship_type: Mapped[ContentRelationshipType] = mapped_column( 

359 SqlaEnum( 

360 ContentRelationshipType, 

361 name="content_relationship_type_enum", 

362 values_callable=lambda obj: [e.value for e in obj], 

363 ), 

364 nullable=False, 

365 ) 

366 # Renamed from metadata to relationship_metadata 

367 relationship_metadata: Mapped[Optional[dict[str, Any]]] = mapped_column( 

368 JSON, nullable=True 

369 ) 

370 

371 # Relationships to the content objects 

372 source_content: Mapped["Content"] = relationship( 

373 "Content", 

374 foreign_keys=[source_content_id], 

375 back_populates="outgoing_relationships", 

376 ) 

377 target_content: Mapped["Content"] = relationship( 

378 "Content", 

379 foreign_keys=[target_content_id], 

380 back_populates="incoming_relationships", 

381 ) 

382 

383 

384class SubjectType(Enum): 

385 """Types of subjects that content can be about""" 

386 

387 COUNTRY = "country" # National entity 

388 REGION = "region" # Geographic region (continent, economic zone, etc.) 

389 ORGANIZATION = "organization" # Company, bank, institution 

390 MARKET = "market" # Stock exchange, trading venue 

391 SECTOR = "sector" # Industry sector or segment 

392 PRODUCT = "product" # Specific product or service 

393 OTHER = "other" # For any other subject type 

394 

395 

396class Subject(Base, PolicyMixin): 

397 """Represents entities (countries, organizations, etc.) that content can be about""" 

398 

399 __tablename__ = "ref_subjects" 

400 

401 id: Mapped[int] = mapped_column(primary_key=True) 

402 name: Mapped[str] = mapped_column(VARCHAR(length=128), nullable=False, index=True) 

403 code: Mapped[Optional[str]] = mapped_column( 

404 VARCHAR(length=32), nullable=True, index=True 

405 ) 

406 description: Mapped[Optional[str]] = mapped_column( 

407 VARCHAR(length=512), nullable=True 

408 ) 

409 

410 subject_type: Mapped[SubjectType] = mapped_column( 

411 SqlaEnum( 

412 SubjectType, 

413 name="subject_type_enum", 

414 values_callable=lambda obj: [e.value for e in obj], 

415 ), 

416 nullable=False, 

417 index=True, 

418 ) 

419 

420 # For hierarchical relationships (e.g., city -> state -> country) 

421 parent_id: Mapped[Optional[int]] = mapped_column( 

422 Integer, ForeignKey("ref_subjects.id", ondelete="SET NULL"), nullable=True 

423 ) 

424 parent: Mapped[Optional["Subject"]] = relationship( 

425 "Subject", remote_side="Subject.id", backref="children" 

426 ) 

427 

428 # Org that manages this subject (optional, for custom subjects) 

429 managing_org_id: Mapped[Optional[str]] = mapped_column( 

430 VARCHAR(length=50), 

431 ForeignKey("organisations.id", ondelete="SET NULL", onupdate="CASCADE"), 

432 nullable=True, 

433 ) 

434 managing_organization: Mapped[Optional[Organisation]] = relationship(Organisation) 

435 

436 # Metadata for the subject (can store country codes, market identifiers, etc.) 

437 subject_metadata: Mapped[Optional[dict[str, Any]]] = mapped_column( 

438 JSON, nullable=True 

439 ) 

440 

441 # Content that has this as primary subject 

442 primary_for_contents: Mapped[list[Content]] = relationship( 

443 Content, 

444 back_populates="primary_subject", 

445 foreign_keys=[Content.primary_subject_id], 

446 ) 

447 

448 # Content that references this subject 

449 referenced_by_contents = relationship( 

450 Content, 

451 secondary=content_subjects_table, 

452 viewonly=True, 

453 lazy="dynamic", 

454 ) 

455 

456 def __repr__(self) -> str: 

457 return f"<Subject: {self.name} ({self.subject_type.value})>" 

458 

459 

460class ContentRevision(Base): 

461 """Base model for tracking Content changes via JSON Patch""" 

462 

463 __tablename__ = "ref_content_revisions" 

464 

465 id: Mapped[int] = mapped_column(primary_key=True) 

466 entity_type: Mapped[str] = mapped_column( 

467 VARCHAR(length=32), nullable=False, index=True 

468 ) 

469 content_id: Mapped[int] = mapped_column( 

470 Integer, 

471 ForeignKey("ref_contents.id", ondelete="CASCADE"), 

472 nullable=False, 

473 index=True, 

474 ) 

475 

476 timestamp: Mapped[datetime] = mapped_column( 

477 DateTime, nullable=False, server_default=func.utc_timestamp(), index=True 

478 ) 

479 

480 user_id: Mapped[str] = mapped_column( 

481 VARCHAR(length=50), 

482 ForeignKey("users.id", ondelete="SET NULL", onupdate="CASCADE"), 

483 nullable=True, 

484 index=True, 

485 ) 

486 

487 # The JSON Patch operations that were applied 

488 patch_operations: Mapped[list[dict[str, Any]]] = mapped_column(JSON, nullable=False) 

489 

490 # Optional comment describing the change 

491 comment: Mapped[Optional[str]] = mapped_column(VARCHAR(length=512), nullable=True) 

492 

493 # Version number of the content to which this revision was applied 

494 version_number: Mapped[int] = mapped_column( 

495 Integer, nullable=False, server_default=text("1") 

496 ) 

497 

498 content: Mapped["Content"] = relationship("Content", back_populates="revisions") 

499 

500 user: Mapped[Optional[User]] = relationship(User) 

501 

502 

503class ContentSpecMap(Base): 

504 """ 

505 A mapping between Question Elements (QElement) and Content fields. 

506 

507 This defines how answers from questionnaire responses can be used to populate 

508 structured content. Each mapping specifies which question elements correspond 

509 to which fields in the content's JSON document via JSON Pointer expressions. 

510 

511 Example: 

512 A vendor selection RFP might have a question "What is your SLA uptime guarantee?" 

513 This mapping would connect that question element to the "sla.uptime" field 

514 in an SLA content document. 

515 """ 

516 

517 __tablename__ = "ref_content_maps" 

518 

519 name: Mapped[str] = mapped_column(VARCHAR(length=256), nullable=False) 

520 description: Mapped[str] = mapped_column(VARCHAR(length=1024), nullable=False) 

521 content_spec_id: Mapped[int] = mapped_column( 

522 Integer, ForeignKey("ref_content_specs.id", ondelete="CASCADE"), nullable=False 

523 ) 

524 

525 # Relationships 

526 content_spec: Mapped[ContentSpec] = relationship( 

527 ContentSpec, back_populates="content_maps" 

528 ) 

529 pairs: Mapped[list["ContentQElementPair"]] = relationship( 

530 "ContentQElementPair", 

531 back_populates="content_map", 

532 cascade="all, delete-orphan", 

533 ) 

534 

535 # Relationship to QElements through pairs for easy navigation 

536 question_elements = relationship( 

537 "QElement", secondary="ref_content_qelements", viewonly=True, lazy="dynamic" 

538 ) 

539 

540 def __repr__(self): 

541 return f"<ContentSpecMap ID {self.id}, name: {self.name}>" 

542 

543 

544class ContentQElementPair(Base): 

545 """ 

546 A mapping between a QElement and a Content field via JSON Pointer. 

547 

548 This represents the actual connection between a question element and where 

549 its answer should be stored in the content document. The content_reference 

550 field contains a JSON Pointer expression that identifies the location in the 

551 content's JSON document. 

552 

553 Examples of content_reference values: 

554 - "$.sla.uptime" -> content_doc["sla"]["uptime"] 

555 - "$.contact.email" -> content_doc["contact"]["email"] 

556 - "$.features[0].name" -> content_doc["features"][0]["name"] 

557 """ 

558 

559 __tablename__ = "ref_content_qelements" 

560 

561 content_map_id: Mapped[int] = mapped_column( 

562 Integer, ForeignKey("ref_content_maps.id", ondelete="CASCADE"), nullable=False 

563 ) 

564 

565 question_element_id: Mapped[int] = mapped_column( 

566 Integer, ForeignKey("question_elements.id", ondelete="CASCADE"), nullable=False 

567 ) 

568 content_reference: Mapped[str] = mapped_column( 

569 VARCHAR(length=256), 

570 nullable=False, 

571 comment="A JSON Pointer expression to resolve a value in the Content item's json_doc", 

572 ) 

573 

574 # Relationships 

575 content_map: Mapped[ContentSpecMap] = relationship( 

576 ContentSpecMap, back_populates="pairs" 

577 ) 

578 question_element: Mapped["QElement"] = relationship("QElement") 

579 

580 def __repr__(self): 

581 return f"<ContentQElementPair ID {self.id}, qel_id: {self.question_element_id}, ref: {self.content_reference}>" 

582 

583 

584def extract_text_from_json(data: Any) -> str: 

585 """Recursively extract all text from a JSON structure.""" 

586 texts: list[str] = [] 

587 

588 def recurse(obj: Any) -> None: 

589 if isinstance(obj, dict): 

590 for value in obj.values(): 

591 recurse(value) 

592 elif isinstance(obj, list): 

593 for item in obj: 

594 recurse(item) 

595 elif isinstance(obj, str): 

596 texts.append(obj) 

597 # ignore other types 

598 

599 recurse(data) 

600 return " ".join(texts) 

601 

602 

603@event.listens_for(Content, "before_insert") 

604def validate_schema_not_draft_before_insert( 

605 mapper: Any, connection: Any, target: Content 

606) -> None: 

607 """Ensure Content only references non-draft ContentSpec objects.""" 

608 from sqlalchemy.orm import object_session 

609 

610 session = object_session(target) 

611 if session is not None: 

612 # Load the schema to check is_draft 

613 spec = session.get(ContentSpec, target.schema_id) 

614 if spec is not None and spec.is_draft: 

615 raise ValueError( 

616 f"Cannot create Content with draft ContentSpec (id={target.schema_id}). " 

617 f"Schema '{spec.name}' must be published (is_draft=False) before creating content." 

618 ) 

619 

620 

621@event.listens_for(Content, "before_insert") 

622def update_content_fts_before_insert( 

623 mapper: Any, connection: Any, target: Content 

624) -> None: 

625 target.content_fts = extract_text_from_json(target.content_doc) 

626 

627 

628@event.listens_for(Content, "before_update") 

629def validate_schema_not_draft_before_update( 

630 mapper: Any, connection: Any, target: Content 

631) -> None: 

632 """Ensure Content only references non-draft ContentSpec objects when schema_id changes.""" 

633 from sqlalchemy.orm import object_session 

634 from sqlalchemy import inspect 

635 

636 session = object_session(target) 

637 if session is not None: 

638 # Check if schema_id was modified 

639 history = inspect(target).attrs.schema_id.history 

640 if history.has_changes(): 

641 # Load the new schema to check is_draft 

642 spec = session.get(ContentSpec, target.schema_id) 

643 if spec is not None and spec.is_draft: 

644 raise ValueError( 

645 f"Cannot update Content to use draft ContentSpec (id={target.schema_id}). " 

646 f"Schema '{spec.name}' must be published (is_draft=False)." 

647 ) 

648 

649 

650@event.listens_for(Content, "before_update") 

651def update_content_fts_before_update( 

652 mapper: Any, connection: Any, target: Content 

653) -> None: 

654 target.content_fts = extract_text_from_json(target.content_doc) 

655 

656 

657__all__ = [ 

658 "Content", 

659 "ContentSpec", 

660 "ContentRelationship", 

661 "ContentRelationshipType", 

662 "Subject", 

663 "SubjectType", 

664 "ContentRevision", 

665 "ContentSpecMap", 

666 "ContentQElementPair", 

667]