spotterbase.convert package

Submodules

spotterbase.convert.anno_query_to_jsonld module

spotterbase.convert.anno_query_to_jsonld.main()

spotterbase.convert.document_to_json module

class spotterbase.convert.document_to_json.Doc2JsonConverter(include_replaced_nodes: bool = True, skip_titles: bool = False, tokenize: bool = True)

Bases: object

process(document: Document) dict
class spotterbase.convert.document_to_json.Doc2JsonConverterCmdFactory

Bases: object

create() Doc2JsonConverter
spotterbase.convert.document_to_json.main()

spotterbase.convert.html_tokenize module

class spotterbase.convert.html_tokenize.HtmlTokenizer(add_word_ids: bool = False, word_class: str | None = None, nodes_to_ignore: set[str] | None = None)

Bases: object

process(document: Document) _ElementTree
spotterbase.convert.html_tokenize.main()

spotterbase.convert.normalize_selectors module

spotterbase.convert.normalize_selectors.get_document_cached(document_uri: Uri) Document
spotterbase.convert.normalize_selectors.main()
spotterbase.convert.normalize_selectors.normalize_target(target: FragmentTarget, document: Document)
spotterbase.convert.normalize_selectors.process(input_json_records: list) list

Module contents