Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ Cantanhêde, S. d. O., Naaijer, M., Højgaard, C. C., & Glanz, O. (2026). Identi

### Use of the dataset

You can use the dataset freely for research and education. If you do so, please refer to the paper. Also refer to the dataset in the following way:
You can use the dataset freely for research and education. If you do so, please refer to the papers. Also refer to the dataset in the following way:

Christian Canu Højgaard, Martijn Naaijer, & Stefan Schorch. (2023).
Text-Fabric Dataset of the Samaritan Pentateuch. Zenodo.
Christian Canu Højgaard, Martijn Naaijer, & Stefan Schorch. (2023).
Text-Fabric Dataset of the Samaritan Pentateuch. Zenodo.
https://doi.org/10.5281/zenodo.7734632

You can also refer to specific versions of the dataset.
Expand Down Expand Up @@ -70,9 +70,13 @@ Version
- 6.0.0 February 2026 addition of phrase atoms.

### Features
Currently, the following features exist for all books:
Currently, the following features exist for all books.
- book
- chapter
- verse
- g_cons
- lex
- gloss
- sp
- g_vbs
- g_pfm
Expand All @@ -83,11 +87,13 @@ Currently, the following features exist for all books:
- g_prs
- vt
- ps
- prs_ps
- nu
- prs_nu
- gn
- prs_ps
- prs_nu
- prs_gn
- language
- trailer

### Textual issues
Some annotations are dubious due to idiosyncracies in the SP manuscript used for this project. The issues are documented in the folder textual_issues.
Some annotations are dubious due to idiosyncrasies in the SP manuscript used for this project. The issues are documented in the folder textual_issues.
2 changes: 1 addition & 1 deletion app/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ docs:
interfaceDefaults: {}
provenanceSpec:
corpus: The Samaritan Pentateuch
version: 6.0.3
version: 6.1
writing: hbo
typeDisplay:
verse:
Expand Down
16 changes: 14 additions & 2 deletions tests/test_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

TF = Fabric(locations=os.path.join(ROOT_DIR, TF_FOLDER, latest_data_folder))
api = TF.load('''
otype g_cons_raw g_cons g_cons_utf8 lex g_pfm g_vbs g_lex g_vbe g_nme g_uvf g_prs sp vt ps nu gn prs_nu prs_ps prs_gn trailer ETCBC_parsing
otype g_cons_raw g_cons g_cons_utf8 lex gloss language g_pfm g_vbs g_lex g_vbe g_nme g_uvf g_prs sp vt ps nu gn prs_nu prs_ps prs_gn trailer ETCBC_parsing
''')
api.loadLog()
api.makeAvailableIn(globals())
Expand Down Expand Up @@ -112,6 +112,18 @@ def test_lexemes_advb_conj_prep_pron_nega_inrg_intj_ending():
def test_lexemes_subs_adjv_ending():
assert all({F.lex.v(w)[-1] == '/' for w in F.otype.s('word') if F.sp.v(w) in {'subs', 'nmpr', 'adjv'}})

def test_glosses_content():
assert all({F.gloss.v(w) != '' for w in F.otype.s('word')})

def test_glosses_consistency(): #Each lexeme may only have one gloss
lex_gloss_dict = collections.defaultdict(set)
for w in F.otype.s('word'):
lex_gloss_dict[F.lex.v(w)].add(F.gloss.v(w))
assert all({w for w in lex_gloss_dict if len(lex_gloss_dict[w]) > 1})

def test_language():
assert all({F.language.v(w) in {'Hebrew','Aramaic'} for w in F.otype.s('word')})

def test_unexpected_preformative():
assert all({not F.g_pfm.v(w) for w in F.otype.s('word') if F.sp.v(w) not in {'verb'}})

Expand All @@ -122,7 +134,7 @@ def test_unexpected_verbal_stem():
assert all({not F.g_vbs.v(w) for w in F.otype.s('word') if F.sp.v(w) not in {'verb'}})

def test_allowed_verbal_stem():
assert all({F.g_vbs.v(w) in {'',']]',']H]',']N]',']T]',']HT]',']W]',']CT]',']HW]',']HCT]',']S]',']>]',']F]',']HF]',']Y]'} for w in F.otype.s('word')})
assert all({F.g_vbs.v(w) in {'',']]',']H]',']N]',']T]',']HT]',']W]',']CT]',']HW]',']HCT]',']S]',']>]',']F]',']HF]',']Y]',']X]'} for w in F.otype.s('word')})

def test_expected_verbal_ending():
assert all({F.g_vbe.v(w) for w in F.otype.s('word') if F.sp.v(w) in {'verb'}})
Expand Down
2 changes: 1 addition & 1 deletion textual_issues/issues.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"0": {"data_version": "4.1", "tf_node": 412335, "section": ["Genesis", 15, 8], "explanation": "The second yod is interpreted as a dittography, but the first yod may also be a vowel indicator and the second part of the lexeme"}, "1": {"data_version": "4.1", "tf_node": 430624, "section": ["Genesis", 44, 10], "explanation": "The word is interpreted as the verb NQH in Qal passive participle realized with an aleph"}, "2": {"data_version": "4.1", "tf_node": 433530, "section": ["Genesis", 49, 12], "explanation": "Florentin and Tal suggests a plural perfective verb with four radicals(?) or a noun. It is interpreted here simply as a singular, masculine adjective"}, "3": {"data_version": "4.1", "tf_node": 446025, "section": ["Exodus", 19, 24], "explanation": "The word K is interpreted as the imperative of HLK and should have been LK"}, "4": {"data_version": "4.1", "tf_node": 470812, "section": ["Leviticus", 19, 18], "explanation": "FLorentin and Tal interpret the verb as an imperfect of VWR[, but it does not explain the presence of an H in this manuscript"}, "5": {"data_version": "4.1", "tf_node": 490667, "section": ["Numbers", 21, 18], "explanation": "The word is interpreted as a qal perfect of KRH[. To account for the aleph, it as treated as a univalent final"}, "6": {"data_version": "4.1", "tf_node": 492881, "section": ["Numbers", 24, 22], "explanation": "Following Florentin and Tal, the word is interpreted as a noun with pronominal suffix"}, "7": {"data_version": "4.1", "tf_node": 506367, "section": ["Deuteronomy", 9, 21], "explanation": "Florentin and Tal follow Ben-Hayyim in calling the H an exclamatory. On the other hand, they consider the verb an adverbial and a non-substantive variation to the Masoretic infinitiv absolute. It is treated here as an hiphil infinitive absolute"}, "8": {"data_version": "4.1", "tf_node": 508836, "section": ["Deuteronomy", 13, 15], "explanation": "Florentin and Tal follow Ben-Hayyim in calling the H an exclamatory. On the other hand, they consider the verb an adverbial and a non-substantive variation to the Masoretic infinitiv absolute. It is treated here as an hiphil infinitive absolute"}, "9": {"data_version": "4.1", "tf_node": 510499, "section": ["Deuteronomy", 17, 4], "explanation": "Florentin and Tal follow Ben-Hayyim in calling the H an exclamatory. On the other hand, they consider the verb an adverbial and a non-substantive variation to the Masoretic infinitiv absolute. It is treated here as an hiphil infinitive absolute"}, "10": {"data_version": "4.1", "tf_node": 515248, "section": ["Deuteronomy", 27, 8], "explanation": "Florentin and Tal follow Ben-Hayyim in calling the H an exclamatory. On the other hand, they consider the verb an adverbial and a non-substantive variation to the Masoretic infinitiv absolute. It is treated here as an hiphil infinitive absolute"}, "11": {"data_version": "4.1", "tf_node": 518074, "section": ["Deuteronomy", 31, 2], "explanation": "The verb is clearly meant to be BW>[ but the initial > is confusing. We suspect it is a scribal error"}, "12": {"data_version": "4.1", "tf_node": 519665, "section": ["Deuteronomy", 33, 2], "explanation": "According to Florentin and Tal, some manuscripts seperate the word into two with a dot. We do not know if this is the case for this manuscript, and we treat it currently as a single word"}}
{"0": {"data_version": "4.1", "tf_node": 412335, "section": ["Genesis", 15, 8], "explanation": "The second yod is interpreted as a dittography, but the first yod may also be a vowel indicator and the second part of the lexeme"}, "1": {"data_version": "4.1", "tf_node": 430624, "section": ["Genesis", 44, 10], "explanation": "The word is interpreted as the verb NQH in Qal passive participle realized with an aleph"}, "2": {"data_version": "4.1", "tf_node": 433530, "section": ["Genesis", 49, 12], "explanation": "Florentin and Tal suggests a plural perfective verb with four radicals(?) or a noun. It is interpreted here simply as a singular, masculine adjective"}, "3": {"data_version": "4.1", "tf_node": 446025, "section": ["Exodus", 19, 24], "explanation": "The word K is interpreted as the imperative of HLK and should have been LK"}, "4": {"data_version": "4.1", "tf_node": 470812, "section": ["Leviticus", 19, 18], "explanation": "FLorentin and Tal interpret the verb as an imperfect of VWR[, but it does not explain the presence of an H in this manuscript"}, "5": {"data_version": "4.1", "tf_node": 490667, "section": ["Numbers", 21, 18], "explanation": "The word is interpreted as a qal perfect of KRH[. To account for the aleph, it as treated as a univalent final"}, "6": {"data_version": "4.1", "tf_node": 492881, "section": ["Numbers", 24, 22], "explanation": "Following Florentin and Tal, the word is interpreted as a noun with pronominal suffix"}, "7": {"data_version": "4.1", "tf_node": 506367, "section": ["Deuteronomy", 9, 21], "explanation": "Florentin and Tal follow Ben-Hayyim in calling the H an exclamatory. On the other hand, they consider the verb an adverbial and a non-substantive variation to the Masoretic infinitiv absolute. It is treated here as an hiphil infinitive absolute"}, "8": {"data_version": "4.1", "tf_node": 508836, "section": ["Deuteronomy", 13, 15], "explanation": "Florentin and Tal follow Ben-Hayyim in calling the H an exclamatory. On the other hand, they consider the verb an adverbial and a non-substantive variation to the Masoretic infinitiv absolute. It is treated here as an hiphil infinitive absolute"}, "9": {"data_version": "4.1", "tf_node": 510499, "section": ["Deuteronomy", 17, 4], "explanation": "Florentin and Tal follow Ben-Hayyim in calling the H an exclamatory. On the other hand, they consider the verb an adverbial and a non-substantive variation to the Masoretic infinitiv absolute. It is treated here as an hiphil infinitive absolute"}, "10": {"data_version": "4.1", "tf_node": 515248, "section": ["Deuteronomy", 27, 8], "explanation": "Florentin and Tal follow Ben-Hayyim in calling the H an exclamatory. On the other hand, they consider the verb an adverbial and a non-substantive variation to the Masoretic infinitiv absolute. It is treated here as an hiphil infinitive absolute"}, "11": {"data_version": "4.1", "tf_node": 518074, "section": ["Deuteronomy", 31, 2], "explanation": "The verb is clearly meant to be BW>[ but the initial > is confusing. We suspect it is a scribal error"}, "12": {"data_version": "4.1", "tf_node": 519665, "section": ["Deuteronomy", 33, 2], "explanation": "According to Florentin and Tal, some manuscripts seperate the word into two with a dot. We do not know if this is the case for this manuscript, and we treat it currently as a single word"}, "13": {"data_version": "6.3", "tf_node": 443343, "section": ["Exodus", 15, 6], "explanation": "Considered a scribal error (metathesis) and harmonized with the MT"}, "14": {"data_version": "6.3", "tf_node": 443406, "section": ["Exodus", 15, 11], "explanation": "Considered a scribal error (metathesis) and harmonized with the MT"}, "15": {"data_version": "6.3", "tf_node": 445973, "section": ["Exodus", 19, 21], "explanation": "Considered a scribal error (substitution) and harmonized with the MT"}, "16": {"data_version": "6.3", "tf_node": 446040, "section": ["Exodus", 19, 24], "explanation": "Considered a scribal error (substitution) and harmonized with the MT"}, "17": {"data_version": "6.3", "tf_node": 453853, "section": ["Exodus", 32, 20], "explanation": "Considered a scribal error (metathesis) and harmonized with the MT"}, "18": {"data_version": "6.3", "tf_node": 465206, "section": ["Leviticus", 11, 30], "explanation": "Considered a scribal error (metathesis) and harmonized with the MT"}, "19": {"data_version": "6.3", "tf_node": 465209, "section": ["Leviticus", 11, 30], "explanation": "Considered a scribal error (omission) and harmonized with the MT"}, "20": {"data_version": "6.3", "tf_node": 483615, "section": ["Numbers", 11, 5], "explanation": "Considered a scribal error (omission) and harmonized with the MT"}, "21": {"data_version": "6.3", "tf_node": 484093, "section": ["Numbers", 11, 25], "explanation": "Considered a scribal error (metathesis) and harmonized with the MT"}, "22": {"data_version": "6.3", "tf_node": 484127, "section": ["Numbers", 11, 26], "explanation": "Considered a scribal error (metathesis) and harmonized with the MT"}, "23": {"data_version": "6.3", "tf_node": 484143, "section": ["Numbers", 11, 27], "explanation": "Considered a scribal error (metathesis) and harmonized with the MT"}, "24": {"data_version": "6.3", "tf_node": 508812, "section": ["Deuteronomy", 13, 14], "explanation": "Considered a scribal error (metathesis) and harmonized with the MT"}, "25": {"data_version": "6.3", "tf_node": 509028, "section": ["Deuteronomy", 14, 5], "explanation": "Considered a scribal error (substitution) and harmonized with the core SP tradition"}, "26": {"data_version": "6.3", "tf_node": 509644, "section": ["Deuteronomy", 15, 9], "explanation": "Considered a scribal error (metathesis) and harmonized with the MT"}, "27": {"data_version": "6.3", "tf_node": 512690, "section": ["Deuteronomy", 21, 20], "explanation": "Considered a scribal error (substitution) and harmonized with the MT"}, "28": {"data_version": "6.3", "tf_node": 519143, "section": ["Deuteronomy", 32, 22], "explanation": "Considered a scribal error and harmonized (substitution) with core SP tradition"}}
Loading
Loading