spotterbase.corpora package

Submodules

spotterbase.corpora.document_queries module

spotterbase.corpora.document_queries.document_iterable_from_query(query: str, doc_var: str = 'doc', endpoint: SparqlEndpoint | None = None) Iterable[Document]

spotterbase.corpora.interface module

exception spotterbase.corpora.interface.CannotLocateCorpusDataError

Bases: Exception

The corpus data cannot be found (e.g. because no path was provided)

class spotterbase.corpora.interface.Corpus

Bases: ABC

abstract get_document(uri: Uri) Document

Should throw DocumentNotInCorpusException if necessary!

get_documents() Iterator[Document]
abstract get_uri() Uri
class spotterbase.corpora.interface.Document

Bases: ABC

get_html_tree(*, cached: bool) _ElementTree
get_node_for_id(node_id: str) _Element
get_offset_converter() OffsetConverter
get_selector_converter() SelectorConverter
abstract get_uri() Uri
has_cached_tree() bool
abstract open_binary() IO[bytes]
open_text(encoding: str = 'utf-8') TextIO
to_dom(arg: FragmentTarget | PathSelector | OffsetSelector) tuple[DomRange, list[DomRange] | None]
exception spotterbase.corpora.interface.DocumentNotFoundError

Bases: Exception

If the document exists, it is part of the corpus, but it cannot be found right now (e.g. the file does not exist).

exception spotterbase.corpora.interface.DocumentNotInCorpusException

Bases: Exception

The requested document is not part of the corpus

spotterbase.corpora.local_file_corpus module

class spotterbase.corpora.local_file_corpus.LocalDocument(uri: Uri, path: Path)

Bases: Document

get_uri() Uri
open_binary() IO[bytes]
spotterbase.corpora.local_file_corpus.load()

spotterbase.corpora.resolver module

class spotterbase.corpora.resolver.Resolver

Bases: object

classmethod get_corpus(uri: str | Uri | URIRef | Path | VocabularyMeta) Corpus | None
classmethod get_document(uri: str | Uri | URIRef | Path | VocabularyMeta) Document | None
classmethod get_document_or_fail(uri: str | Uri | URIRef | Path | VocabularyMeta) Document
classmethod get_known_corpora() Iterable[Corpus]
classmethod register_corpus(corpus: Corpus)

spotterbase.corpora.test_corpus module

class spotterbase.corpora.test_corpus.TestDocument(uri: Uri, path: Path)

Bases: Document

get_uri() Uri
open_binary() IO[bytes]
spotterbase.corpora.test_corpus.load()

spotterbase.corpora.write_document_to_file module

spotterbase.corpora.write_document_to_file.main()

Module contents