Veliki slovensko-nemški slovar

From LexBib

Veliki slovensko-nemški slovar on Wikibase

The item describing this dictionary is Q34170.

Entries

Slovene-German dictionary

Display senses and sense translations using this query.

PREFIX lwb: <https://lexbib.elex.is/entity/>
PREFIX ldp: <https://lexbib.elex.is/prop/direct/>
PREFIX lp: <https://lexbib.elex.is/prop/>
PREFIX lps: <https://lexbib.elex.is/prop/statement/>
PREFIX lpq: <https://lexbib.elex.is/prop/qualifier/>
PREFIX lpr: <https://lexbib.elex.is/prop/reference/>
PREFIX lno: <https://lexbib.elex.is/prop/novalue/>

select distinct ?entry ?lemma (lang(?lemma) as ?lang) ?posLabel
  (group_concat(distinct concat(?label," (",?labeltype,")"); SEPARATOR=" | ") as ?otherLabels)
  (count(distinct ?expl) as ?examples)
  ?sense ?sense_num
  (group_concat(distinct str(?def); SEPARATOR =", ") as ?definitions)
  (group_concat(distinct str(?trans); SEPARATOR=" | ") as ?translations)
  

where { 
  ?entry ldp:P5 lwb:Q111; ldp:P207 lwb:Q34170; wikibase:lemma ?lemma; wikibase:lexicalCategory ?pos.
  optional {?entry ldp:P203 ?dmlex_label. ?dmlex_label rdfs:label ?label. filter(lang(?label)="en")
            ?dmlex_label ldp:P191 [rdfs:label ?labeltype]. filter(lang(?labeltype)="en")}
  optional {?entry ontolex:sense ?sense. bind (replace(str(?sense), "https://lexbib.elex.is/entity/L[0-9]+-S", "") as ?sense_num)
      optional {?sense ldp:P218 ?def.}
      optional {?sense ldp:P217 ?trans.}
           }
  optional {{?entry ldp:P204 ?pron.} union {?entry ontolex:sense [ldp:P204 ?pron].}}
  optional {{?entry ldp:P213 ?expl.} union {?entry ontolex:sense [ldp:P213 ?expl].}}
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
  } group by ?entry ?lemma ?posLabel ?otherLabels ?examples ?sense ?sense_num
    ?definitions ?translations 
order by lcase(?lemma) xsd:integer(?sense_num)

Try it!

Slovene-German dictionary: Sense-alignment with Wikidata

This gives a list of all entries, plus German sense descriptions from Wikidata in case the German "headword translation" statement in the sense is qualified with such alignment.

PREFIX lwb: <https://lexbib.elex.is/entity/>
PREFIX ldp: <https://lexbib.elex.is/prop/direct/>
PREFIX lp: <https://lexbib.elex.is/prop/>
PREFIX lps: <https://lexbib.elex.is/prop/statement/>
PREFIX lpq: <https://lexbib.elex.is/prop/qualifier/>
PREFIX lpr: <https://lexbib.elex.is/prop/reference/>
PREFIX lno: <https://lexbib.elex.is/prop/novalue/>

select distinct ?entry ?lemma (lang(?lemma) as ?lang) ?posLabel
  (group_concat(distinct concat(?label," (",?labeltype,")"); SEPARATOR=" | ") as ?otherLabels)
  ?sense ?sense_num
  (group_concat(distinct str(?def); SEPARATOR =", ") as ?definitions)
  (group_concat(distinct str(?trans); SEPARATOR=" | ") as ?translations)
  ?wikidata_sense ?wikidata_gloss

where { 
  ?entry ldp:P5 lwb:Q111; ldp:P207 lwb:Q34170; wikibase:lemma ?lemma; wikibase:lexicalCategory ?pos.
  optional {?entry ldp:P203 ?dmlex_label. ?dmlex_label rdfs:label ?label. filter(lang(?label)="en")
            ?dmlex_label ldp:P191 [rdfs:label ?labeltype]. filter(lang(?labeltype)="en")}
  optional {?entry ontolex:sense ?sense. bind (replace(str(?sense), "https://lexbib.elex.is/entity/L[0-9]+-S", "") as ?sense_num)
      optional {?sense ldp:P218 ?def.}
      optional {?sense lp:P217 ?trans_st. ?trans_st lps:P217 ?trans.
      optional {?trans_st lpq:P222 ?wd_sense. bind(iri(concat(str(wd:),?wd_sense)) as ?wikidata_sense)
  SERVICE <https://query.wikidata.org/sparql> { select ?wikidata_sense ?wikidata_gloss where {
             ?wikidata_sense skos:definition ?wikidata_gloss. filter(lang(?wikidata_gloss)="de")
           }}}}}  
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
  } group by ?entry ?lemma ?posLabel ?otherLabels ?sense ?sense_num
    ?definitions ?translations ?wikidata_sense ?wikidata_gloss
order by lcase(?lemma) xsd:integer(?sense_num)

Try it!


Entry content statistics

This query lists the entries of this dictionary, with some statistics about their content.

PREFIX lwb: <https://lexbib.elex.is/entity/>
PREFIX ldp: <https://lexbib.elex.is/prop/direct/>
PREFIX lp: <https://lexbib.elex.is/prop/>
PREFIX lps: <https://lexbib.elex.is/prop/statement/>
PREFIX lpq: <https://lexbib.elex.is/prop/qualifier/>
PREFIX lpr: <https://lexbib.elex.is/prop/reference/>
PREFIX lno: <https://lexbib.elex.is/prop/novalue/>

select distinct ?entry ?lemma (lang(?lemma) as ?lang) ?pos ?posLabel
  (group_concat(distinct concat(?label," (",?labeltype,")"); SEPARATOR=" | ") as ?otherLabels)
  (count(distinct ?pron) as ?pronunciations)
  (count(distinct ?sense) as ?senses)
  (count(distinct ?def) as ?definitions)
  (count(distinct ?trans) as ?translations)
  (count(distinct ?expl) as ?examples)

where { 
  ?entry ldp:P5 lwb:Q111; ldp:P207 lwb:Q34170; wikibase:lemma ?lemma; wikibase:lexicalCategory ?pos.
  optional {?entry ldp:P203 ?dmlex_label. ?dmlex_label rdfs:label ?label. filter(lang(?label)="en")
            ?dmlex_label ldp:P191 [rdfs:label ?labeltype]. filter(lang(?labeltype)="en")}
  optional {?entry ontolex:sense ?sense.
      optional {?sense ldp:P218 ?def.}
      optional {?sense ldp:P217 ?trans.}
           }
  optional {{?entry ldp:P204 ?pron.} union {?entry ontolex:sense [ldp:P204 ?pron].}}
  optional {{?entry ldp:P213 ?expl.} union {?entry ontolex:sense [ldp:P213 ?expl].}}
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
  } group by ?entry ?lemma ?pos ?posLabel ?otherLabels ?pronounciations ?senses ?definitions ?translations ?examples
order by lcase(?lemma) limit 10000 # without limit, this query is actually timing out

Try it!


POS distribution

The query shows the distribution of POS values in this dictionary.

#defaultView:BarChart
PREFIX lwb: <https://lexbib.elex.is/entity/>
PREFIX ldp: <https://lexbib.elex.is/prop/direct/>

select distinct ?pos ?posLabel (count(?entry) as ?num_of_entries)
where { 
  ?entry ldp:P5 lwb:Q111; ldp:P207 lwb:Q34170; wikibase:lexicalCategory ?pos.  
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
  } group by ?pos ?posLabel ?num_of_entries
order by desc(?num_of_entries)

Try it!


Controlled values

This SPARQL query shows the content of the controlled values block in this dictionary, how often each of them is used in the data, and alignments to Wikidata that have been set to the controlled values.

PREFIX lwb: <https://lexbib.elex.is/entity/>
PREFIX ldp: <https://lexbib.elex.is/prop/direct/>
PREFIX lp: <https://lexbib.elex.is/prop/>
PREFIX lps: <https://lexbib.elex.is/prop/statement/>
PREFIX lpq: <https://lexbib.elex.is/prop/qualifier/>
PREFIX lpr: <https://lexbib.elex.is/prop/reference/>
PREFIX lno: <https://lexbib.elex.is/prop/novalue/>

select distinct ?tag (count(distinct ?tag_subject) as ?uses) ?source_literal ?tagLabel ?tagType ?tagTypeLabel ?labelTypeTag ?labelTypeTagLabel ?wikidata_ref ?wikidata_refLabel ?wikidata_desc

where { 
  values ?tagType {lwb:Q104 lwb:Q101 lwb:Q102 lwb:Q103 lwb:Q105 lwb:Q106 lwb:Q107 lwb:Q104} # dmlex tag types
  ?tag ldp:P207 lwb:Q34170; ldp:P5 ?tagType; ldp:P38 ?source_literal.
  optional {?tag_subject ?p ?tag.}
  optional {?tag ldp:P191 ?labelTypeTag.}
  optional {?tag ldp:P212 ?wdref. bind(iri(concat(str(wd:),?wdref)) as ?wikidata_ref)
           SERVICE <https://query.wikidata.org/sparql> { select ?wikidata_ref ?wikidata_refLabel ?wikidata_desc where {
             ?wikidata_ref schema:description ?wikidata_desc. filter(lang(?wikidata_desc)="en")
            SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}}
           }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
  } group by ?tag ?uses ?source_literal ?tagLabel ?tagType ?tagTypeLabel ?labelTypeTag ?labelTypeTagLabel ?wikidata_ref ?wikidata_refLabel ?wikidata_desc
order by desc(?labelTypeTag)

Try it!


Examples without assigned sense

This SPARQL query shows usage examples in this dictionary which are not linked to one of the senses of the entry, and the candidate senses's IDs and German glosses. This data could be used to prompt a user for manual assignation of the correct sense.

PREFIX lwb: <https://lexbib.elex.is/entity/>
PREFIX ldp: <https://lexbib.elex.is/prop/direct/>
PREFIX lp: <https://lexbib.elex.is/prop/>
PREFIX lps: <https://lexbib.elex.is/prop/statement/>
PREFIX lpq: <https://lexbib.elex.is/prop/qualifier/>
PREFIX lpr: <https://lexbib.elex.is/prop/reference/>
PREFIX lno: <https://lexbib.elex.is/prop/novalue/>

select distinct ?entry ?lemma (lang(?lemma) as ?lang) ?pos ?posLabel 
  (group_concat(distinct concat(strafter(str(?sense),str(lwb:)),": ",?sense_trans); SEPARATOR=" | ") as ?german_candidate_senses)
  ?expl ?expl_trans

where { 
  ?entry ldp:P5 lwb:Q111; ldp:P207 lwb:Q34170; wikibase:lemma ?lemma; wikibase:lexicalCategory ?pos.
  ?entry lp:P213 ?expl_st. ?expl_st lps:P213 ?expl. filter not exists {?expl_st lpq:P211 ?subject_sense.}
  optional {?expl_st lpq:P210 ?expl_trans.}
  ?entry ontolex:sense ?sense. ?sense skos:definition ?sense_trans. filter(lang(?sense_trans) = "de")
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
  } group by ?entry ?lemma ?pos ?posLabel ?german_candidate_senses ?expl ?expl_trans
order by lcase(?lemma)

Try it!


Entries with undefined POS

This SPARQL query shows entries where the dmlex conversion script has assigned the value Q108 'undefined' as POS, because there was no POS specified in the source.

PREFIX lwb: <https://lexbib.elex.is/entity/>
PREFIX ldp: <https://lexbib.elex.is/prop/direct/>
PREFIX lp: <https://lexbib.elex.is/prop/>
PREFIX lps: <https://lexbib.elex.is/prop/statement/>
PREFIX lpq: <https://lexbib.elex.is/prop/qualifier/>
PREFIX lpr: <https://lexbib.elex.is/prop/reference/>
PREFIX lno: <https://lexbib.elex.is/prop/novalue/>

select distinct ?entry ?lemma (lang(?lemma) as ?lang)
  (group_concat(distinct concat(?label," (",?labeltype,")"); SEPARATOR=" | ") as ?otherLabels)
  (count(distinct ?pron) as ?pronunciations)
  (count(distinct ?sense) as ?senses)
  (count(distinct ?def) as ?definitions)
  (count(distinct ?trans) as ?translations)
  (count(distinct ?expl) as ?examples)

where { 
  ?entry ldp:P5 lwb:Q111; ldp:P207 lwb:Q34170; wikibase:lemma ?lemma; wikibase:lexicalCategory lwb:Q108.
  optional {?entry ldp:P203 ?dmlex_label. ?dmlex_label rdfs:label ?label. filter(lang(?label)="en")
            ?dmlex_label ldp:P191 [rdfs:label ?labeltype]. filter(lang(?labeltype)="en")}
  optional {?entry ontolex:sense ?sense.
      optional {?sense ldp:P218 ?def.}
      optional {?sense ldp:P217 ?trans.}
           }
  optional {{?entry ldp:P204 ?pron.} union {?entry ontolex:sense [ldp:P204 ?pron].}}
  optional {{?entry ldp:P213 ?expl.} union {?entry ontolex:sense [ldp:P213 ?expl].}}
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
  } group by ?entry ?lemma ?otherLabels ?pronounciations ?senses ?definitions ?translations ?examples
order by lcase(?lemma)

Try it!

Declension markers and entries with undefined POS

This SPARQL query shows a list of declension forms (present in the data as lexeme form of tupe "declension"), and the entries without assigned POS to which the distinct declensions belong. This can be used to assign POS.

PREFIX lwb: <https://lexbib.elex.is/entity/>
PREFIX ldp: <https://lexbib.elex.is/prop/direct/>
PREFIX lp: <https://lexbib.elex.is/prop/>
PREFIX lps: <https://lexbib.elex.is/prop/statement/>
PREFIX lpq: <https://lexbib.elex.is/prop/qualifier/>
PREFIX lpr: <https://lexbib.elex.is/prop/reference/>
PREFIX lno: <https://lexbib.elex.is/prop/novalue/>

select distinct ?declension (count(?lemma) as ?num_of_entries) 
(group_concat(str(?lemma); SEPARATOR=" | ") as ?lemmata)  
(group_concat(str(?german_sense_trans); SEPARATOR=" | ") as ?german)   

where { 
  ?form ldp:P190 lwb:Q34365; ontolex:representation ?declension.
  {select ?entry ?lemma ?form ?german_sense_trans where {
  ?entry ldp:P5 lwb:Q111; ldp:P207 lwb:Q34170; wikibase:lemma ?lemma; wikibase:lexicalCategory lwb:Q108. # category Q108: undefined pos.
  ?entry ontolex:sense [skos:definition ?german_sense_trans].
  ?entry ontolex:lexicalForm ?form. }
  }
# SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
  } group by ?declension ?num_of_entries ?lemmata ?german
order by ?num_of_entries

Try it!


Relations in this dictionary

See all relations and their relation types using this query.

PREFIX lwb: <https://lexbib.elex.is/entity/>
PREFIX ldp: <https://lexbib.elex.is/prop/direct/>
PREFIX lp: <https://lexbib.elex.is/prop/>
PREFIX lps: <https://lexbib.elex.is/prop/statement/>
PREFIX lpq: <https://lexbib.elex.is/prop/qualifier/>

select distinct ?origin ?originId ?originLemma ?relationType ?target ?targetId ?targetLemma
where {?origin lp:P5 [lps:P5 lwb:Q111; lpq:P186 ?originId]; ldp:P207 lwb:Q34170; lp:P214 [lps:P214 ?target; lpq:P223 [rdfs:label ?relationType]]; wikibase:lemma ?originLemma. 
       ?target lp:P5 [lps:P5 lwb:Q111; lpq:P186 ?targetId]; wikibase:lemma ?targetLemma. 
      }
order by lcase(?originLemma)

Try it!


Source data examples

Examples entries in the dmlex XML source;

črtica, noun

This entry on Wikibase is L7825.

<entry id="SLDE-V_007293_črtica_1">
    <headword>črtica</headword>
    <partOfSpeech tag="samostalnik" />
    <label tag="ženski" />
    <pronunciation>
      <transcription scheme="stress-position">
        <text>č[r]tica</text>
      </transcription>
    </pronunciation>
    <inflectedForm tag="core-boundary">
      <text>črtic|a</text>
    </inflectedForm>
    <inflectedForm tag="declension">
      <text>-e</text>
    </inflectedForm>
    <sense id="SLDE-V_007293_črtica_1-1">
      <definition definitionType="indicator">
        <text>merilna</text>
      </definition>
      <example sourceIdentity="1">
        <text>
          <headwordMarker>črtica</headwordMarker>
        </text>
        <exampleTranslation>
          <text>Strich</text>
        </exampleTranslation>
      </example>
      <headwordTranslation>
        <text>Strich</text>
        <label tag="der" />
      </headwordTranslation>
      <headwordTranslation>
        <text>Teilstrich</text>
        <label tag="der" />
      </headwordTranslation>
    </sense>
    <sense id="SLDE-V_007293_črtica_1-2">
      <definition definitionType="indicator">
        <text>ločilo</text>
      </definition>
      <definition definitionType="synonym">
        <text>vezaj</text>
      </definition>
      <definition definitionType="synonym">
        <text>deljaj</text>
      </definition>
      <example sourceIdentity="1">
        <text>
          <headwordMarker>črtica</headwordMarker>
        </text>
        <exampleTranslation>
          <text>Strich</text>
        </exampleTranslation>
      </example>
      <example sourceIdentity="2">
        <text>
          <headwordMarker>črtica</headwordMarker>
        </text>
        <exampleTranslation>
          <text>Bindestrich</text>
        </exampleTranslation>
      </example>
      <example sourceIdentity="3">
        <text>
          <headwordMarker>črtica</headwordMarker>
        </text>
        <exampleTranslation>
          <text>Trennungsstrich</text>
        </exampleTranslation>
      </example>
      <example sourceElaboration="compound">
        <text>dvojna črtica</text>
        <exampleTranslation>
          <text>doppelter</text>
        </exampleTranslation>
      </example>
      <example sourceElaboration="compound">
        <text>enojna črtica</text>
        <exampleTranslation>
          <text>Bindestrich</text>
          <label tag="der" />
        </exampleTranslation>
      </example>
      <headwordTranslation>
        <text>Strich</text>
        <label tag="der" />
      </headwordTranslation>
      <headwordTranslation>
        <text>Bindestrich</text>
        <label tag="der" />
      </headwordTranslation>
      <headwordTranslation>
        <text>Trennungsstrich</text>
        <label tag="der" />
      </headwordTranslation>
    </sense>
    <sense id="SLDE-V_007293_črtica_1-3">
      <definition definitionType="synonym">
        <text>pomišljaj</text>
      </definition>
      <headwordTranslation>
        <text>Gedankenstrich</text>
        <label tag="der" />
      </headwordTranslation>
    </sense>
    <sense id="SLDE-V_007293_črtica_1-4">
      <headwordTranslation>
        <text>Strichelchen</text>
        <label tag="das" />
      </headwordTranslation>
    </sense>
  </entry>

črtati, verb

This entry on Wikibase is L7822.

<entry id="SLDE-V_007290_črtati_1">
    <headword>črtati</headword>
    <pronunciation>
      <transcription scheme="stress-position">
        <text>č[r]tati</text>
      </transcription>
    </pronunciation>
    <inflectedForm tag="core-boundary">
      <text>črta|ti</text>
    </inflectedForm>
    <inflectedForm tag="declension">
      <text>-m</text>
    </inflectedForm>
    <sense id="SLDE-V_007290_črtati_1-1">
      <definition definitionType="indicator">
        <text>narediti črte</text>
      </definition>
      <example sourceIdentity="1">
        <text>
          <headwordMarker>črtati</headwordMarker>
        </text>
        <exampleTranslation>
          <text>linieren</text>
        </exampleTranslation>
      </example>
      <headwordTranslation>
        <text>linieren</text>
      </headwordTranslation>
      <headwordTranslation>
        <text>Streifen machen</text>
      </headwordTranslation>
      <headwordTranslation>
        <text>Streifen anbringen</text>
      </headwordTranslation>
    </sense>
    <sense id="SLDE-V_007290_črtati_1-2">
      <definition definitionType="synonym">
        <text>brisati, izbrisati</text>
      </definition>
      <example sourceIdentity="1">
        <text>
          <headwordMarker>črtati</headwordMarker>
        </text>
        <exampleTranslation>
          <text>streichen</text>
        </exampleTranslation>
      </example>
      <headwordTranslation>
        <text>streichen</text>
      </headwordTranslation>
      <headwordTranslation>
        <text>ausstreichen</text>
      </headwordTranslation>
      <headwordTranslation>
        <text>durchstreichen</text>
      </headwordTranslation>
      <headwordTranslation>
        <text>den Rotstift ansetzen</text>
      </headwordTranslation>
    </sense>
    <sense id="SLDE-V_007290_črtati_1-3">
      <definition definitionType="indicator">
        <text>zapis na trakovih ipd.</text>
      </definition>
      <headwordTranslation>
        <text>löschen</text>
      </headwordTranslation>
    </sense>
    <sense id="SLDE-V_007290_črtati_1-4">
      <definition definitionType="indicator">
        <text>napake ipd.</text>
      </definition>
      <example sourceElaboration="compound">
        <text>neustrezno/nepotrebno črtati</text>
        <exampleTranslation>
          <text>Nichtzutreffendes streichen</text>
        </exampleTranslation>
      </example>
      <headwordTranslation>
        <text>ausmerzen</text>
      </headwordTranslation>
    </sense>
    <sense id="SLDE-V_007290_črtati_1-5">
      <example sourceElaboration="compound">
        <text>črtati se</text>
        <exampleTranslation>
          <text>sich abheben, sich abzeichnen</text>
        </exampleTranslation>
      </example>
      <example>
        <text>ostro se črtati</text>
        <exampleTranslation>
          <text>in scharfen Konturen</text>
        </exampleTranslation>
      </example>
    </sense>
    <sense id="SLDE-V_007290_črtati_1-6">
      <definition definitionType="indicator">
        <text>skrb</text>
      </definition>
      <example>
        <text>črtati komu na obrazu</text>
        <exampleTranslation>
          <text>in seinem Gesicht</text>
        </exampleTranslation>
      </example>
      <headwordTranslation>
        <text>sich abzeichnen</text>
      </headwordTranslation>
    </sense>
  </entry>

Python script for conversion and upload

This is the version of the script used for this dictionary:

import json, re, validators
import time
from xml.etree import cElementTree as ET
import sys, requests
from bots import xwbi # this contains the wikibaseintegrator for python

source_file = "SLDE-V_DMlex_3.0.xml"
mappingfile = f'dmlex_mappings/{source_file.replace(".xml", ".json")}'

tree = ET.ElementTree(file='dmlex_source/'+source_file)
lexicographic_resource = tree.getroot()
print(f"\nSuccessfully loaded XML source: {source_file}")

# functions

def get_mapping(dict_qid): # this gets existing wikibase entry end sense ids and their corresponding dmlex source ID
    r = requests.get(
        f"https://lexbib.elex.is/query/sparql?format=json&query=PREFIX%20lwb%3A%20%3Chttps%3A%2F%2Flexbib.elex.is%2Fentity%2F%3E%0APREFIX%20ldp%3A%20%3Chttps%3A%2F%2Flexbib.elex.is%2Fprop%2Fdirect%2F%3E%0APREFIX%20lp%3A%20%3Chttps%3A%2F%2Flexbib.elex.is%2Fprop%2F%3E%0APREFIX%20lps%3A%20%3Chttps%3A%2F%2Flexbib.elex.is%2Fprop%2Fstatement%2F%3E%0APREFIX%20lpq%3A%20%3Chttps%3A%2F%2Flexbib.elex.is%2Fprop%2Fqualifier%2F%3E%0A%0Aselect%20%3Fsource_id%20%3Flid%0A%0Awhere%20%7B%0A%20%20%3Fentry%20ldp%3AP207%20lwb%3A{dict_qid}.%0A%20%20%7B%3Fentry%20lp%3AP5%20%5Blps%3AP5%20lwb%3AQ111%3B%20lpq%3AP186%20%3Fsource_id%5D.%20bind%20(strafter(str(%3Fentry)%2Cstr(lwb%3A))%20as%20%3Flid)%7D%20%0A%20%20union%20%0A%20%20%7B%3Fentry%20ontolex%3Asense%20%3Fsense.%20%3Fsense%20lp%3AP5%20%5Blps%3AP5%20lwb%3AQ112%3B%20lpq%3AP186%20%3Fsource_id%5D.%20bind%20(strafter(str(%3Fsense)%2Cstr(lwb%3A))%20as%20%3Flid)%7D%0A%20%7D")
    entries = r.json()['results']['bindings']
    mapping = {}
    for entry in entries:
        if entry['source_id']['value'] in mapping:
            print(f"Error: duplicate source ID {entry['source_id']['value']} - https://lexbib.elex.is/entity/{mapping[entry['source_id']['value']]}")
            time.sleep(1)
        mapping[entry['source_id']['value']] = entry['lid']['value']
    return mapping

def dump_controldata(controldata):
    with open(mappingfile, "w", encoding="utf-8") as metafile:
        json.dump(controldata, metafile, indent=2)

def get_langdata(ietf_code):
    print(f"Getting wiki code and wikibase item for language {ietf_code} (IETF) from LexBib Wikibase...")
    query = "select ?iso_639_3 ?langCode_wiki ?langCode_item where { "
    query += f'?langCode_item xdp:P32 ?iso_639_3; xdp:P43 ?langCode_wiki; xdp:P185 "{ietf_code}".'
    query += "}"

    results = \
    xwbi.wbi_helpers.execute_sparql_query(query=query, prefix=xwbi.config['mapping']['wikibase_sparql_prefixes'],
                                          endpoint=xwbi.config['mapping']['wikibase_sparql_endpoint'])['results'][
        'bindings']
    print(results)
    return {'langCode_wiki': results[0]['langCode_wiki']['value'],
            'langCode_item': results[0]['langCode_item']['value'].replace(xwbi.config['mapping']['wikibase_entity_ns'], "")}

def get_label_claim(label_element):
    if 'tag' not in label_element.attrib:
        print(f"Fatal error: There is no 'tag' attrib in this element.")
        sys.exit()
    label = label_element.attrib['tag']
    if label not in controldata['labelTag']:
        print(f"Fatal error: The label '{label}' is not a defined controlled value.")
        sys.exit()
    label_qid = controldata['labelTag'][label]
    print(f"Adding label '{label}' as '{label_qid}'.")
    return xwbi.Item(prop_nr="P203", value=label_qid, qualifiers=[xwbi.String(prop_nr="P197", value=label)])  # dmlex label item / tag in source

def get_pronunciation_claim(pronunciation_element):
    # !! This takes the text from the first transcription element. In other words,
    # !! <proncuncation><transcription> is conflated to one (P204 "pronunciation", string).
    # TODO: What to do if several transcription elements in one pronunciation element
    for transcription_element in pronunciation_element.findall('transcription'):
        pron_text = None
        pron_scheme_qualifiers = []
        if 'scheme' in transcription_element.attrib:
            pron_scheme = transcription_element.attrib['scheme']
            if pron_scheme not in controldata['transcriptionSchemeTag']:
                print(
                    f"Fatal error: The transcription scheme tag '{pron_scheme}' is not a defined controlled value.")
                sys.exit()
            pron_scheme_qualifiers = [xwbi.Item(prop_nr="194", value=controldata['transcriptionSchemeTag'][
                pron_scheme])]  # transcription scheme tag
        for pron_text_element in transcription_element.findall('text'):
            pron_text = pron_text_element.text.strip()
        return xwbi.String(prop_nr="P204", value=pron_text, qualifiers=pron_scheme_qualifiers)  # dmlex pronunciation

def write_relation(origin=None, target=None, prop=None):
    if prop == "P214": # relation to entry
        claim = xwbi.Lexeme(prop_nr="P214", value=target)
        subject_lexeme = xwbi.wbi.lexeme.get(entity_id=origin)
        subject_lexeme.claims.add(claim)
        subject_lexeme.write()
        print(
            f"[{relation_count}] Written relation to https://lexbib.elex.is/entity/{origin} - {prop} - {target}")
        time.sleep(1)
        # with open(relations_mappingfile, 'a') as file:
        #     file.write(json.dumps({
        #         "subject": member_roles["reference-origin"],
        #         "prop": relation_prop,
        #         "type": relation_type,
        #         "object": member_roles["reference-target"]
        #     })+"\n")
    else:
        print(
            f"Fatal error: relation prop not implemented in script: {relation_prop}") # TODO: relations that connect two senses or point from entry to sense
        sys.exit()



# main part of the script

try:
    with open(mappingfile, "r", encoding="utf-8") as metafile:
        controldata = json.load(metafile)
except:
    controldata = {'title': lexicographic_resource.attrib['title'], 'uri': lexicographic_resource.get('uri')}
    if not validators.url(controldata['uri']):
        controldata['uri'] = None

print(controldata)

# define dictionary source language

if 'langCode_wiki' in controldata and 'langCode_item' in controldata:
    pass
else: # get language data from Wikibase
    if 'langCode' not in lexicographic_resource.attrib:
        print("Fatal error: 'langCode' attribute missing in <lexicographicResource>.")
        sys.exit()
    lang_data = get_langdata(lexicographic_resource.attrib['langCode'])
    controldata['langCode_wiki'] = lang_data['langCode_wiki']
    controldata['langCode_item'] = lang_data['langCode_item']
    dump_controldata(controldata)

langCode_wiki = controldata['langCode_wiki']
langCode_item = controldata['langCode_item']

# define dictionary translation language
# contrary to the dmlex docs, this assumes that the translation language "listing order" is implied in the order of the elements (the "listingOrder" attrib is not there in the dmlex xml source datasets)

if 'trans_langCodes' in controldata:
    pass
else:
    controldata['trans_langCodes'] = []
    for trans_lang in lexicographic_resource.findall('translationLanguage'):
        if 'langCode' in trans_lang.attrib:
            trans_langCode = trans_lang.attrib['langCode']
            lang_data = get_langdata(trans_langCode)
            controldata['trans_langCodes'].append(
                {'langCode_wiki':lang_data['langCode_wiki'],
                 'langCode_item':lang_data['langCode_item']}
            )

# define dictionary item on Wikibase

if "dictionary_item" in controldata:
    dict_qid = controldata['dictionary_item'] # dict already exists on Wikibase
else: # create item describing dictionary
    labels =[{'lang': langCode_wiki, 'value': controldata['title']}]
    if langCode_wiki != "en":
        labels.append({'lang': 'en', 'value': controldata['title']})
    statements = [
        {'type': 'item', 'prop_nr': 'P5', 'value': 'Q100'}, # instance of dmlex lexicographical resource
        {'type': 'monolingualtext', 'prop_nr': 'P6', 'value': controldata['title'], 'lang':langCode_wiki},
        {'type': 'item', 'prop_nr': 'P150', 'value': controldata['langCode_item']}
    ]
    trans_lang_listpos = 0
    for trans_lang in controldata['trans_langCodes']:
        trans_lang_listpos += 1
        statements.append({'type': 'item', 'prop_nr': 'P134', 'value': trans_lang['langCode_item'],
                           'qualifiers':[{'type': 'string', 'prop_nr': 'P33', 'value': str(trans_lang_listpos)}]})
    if controldata['uri']:
        statements.append({'type': 'url', 'prop_nr': 'P112', 'value': controldata['uri']})
    itemdata = {'qid': False, 'statements': statements, 'labels': labels}
    dict_qid = xwbi.itemwrite(itemdata)
    print(f"Item describing dictionary has been created: https://lexbib.elex.is/entity/{dict_qid}")
    controldata['dictionary_item'] = dict_qid
    dump_controldata(controldata)
    time.sleep(3)

# process controlled values

controlled_value_groups = {
    'labelTypeTag':'Q104',
    'labelTag':'Q103',
    'definitionTypeTag': 'Q101',
    'partOfSpeechTag':'Q105',
    'transcriptionSchemeTag':'Q107',
    'inflectedFormTag':'Q102'
  #  'sourceIdentityTag': 'Q106' # this in the data is not used for a controlled value and is treated as literal
} # values for 'instance of' in items describing tags

for cv in controlled_value_groups:
    if cv not in controldata:
        controldata[cv] = {}
    for tagname in lexicographic_resource.findall(cv):
        val = tagname.attrib['tag']
        if 'typeTag' in tagname.attrib:
            tag_type = tagname.attrib['typeTag']
            if tag_type not in controldata['labelTypeTag']:
                print(f"Fatal error: Tag type '{tag_type}' is unknown.")
                sys.exit()
        else:
            tag_type = None
        descriptions = []
        for description_element in tagname.findall('description'):
            descriptions.append({'lang': 'en', 'value': description_element.text.strip()})
        print(f"\nWill check value '{val}' of controlled value group '{cv}'...")
        if val in controldata[cv]:
            print(f"Wikibase item for {val} ({cv}) is already there: {controldata[cv][val]}")
        else:
            print(f"Need to create Wikibase item for {val} ({cv})...")
            labels = [{'lang': langCode_wiki, 'value': val}]
            if langCode_wiki != "en":
                labels.append({'lang': 'en', 'value': val})
            statements = [
                {'type': 'item', 'prop_nr': 'P5', 'value': controlled_value_groups[cv]},  # instance of (cv group)
                {'type': 'item', 'prop_nr': 'P207', 'value': dict_qid},
                {'type': 'string', 'prop_nr': 'P197', 'value': val, 'qualifiers':[
                    {'type': 'item', 'prop_nr': 'P207', 'value': dict_qid}
                ]}
            ]
            if tag_type:
                statements.append({'type': 'item', 'prop_nr': 'P191', 'value': controldata['labelTypeTag'][tag_type]})
            itemdata = {'qid': False, 'statements':statements, 'labels': labels, 'descriptions':descriptions}
            val_qid = xwbi.itemwrite(itemdata)
            controldata[cv][val] = val_qid
            dump_controldata(controldata)
            time.sleep(1.5)

# check structure of entries
if 'structure' not in controldata:
    print("\n...Checking XML content...")
    structure = {'lr':{}, 'entry':{}, 'sense':{}}
    for lr_element in lexicographic_resource:
        if lr_element.tag not in structure['lr']:
            structure['lr'][lr_element.tag] = []
        elif lr_element.tag != "entry":
            for attr in lr_element.attrib:
                if attr not in structure['lr'][lr_element.tag]:
                    structure['lr'][lr_element.tag].append(attr)
    for entry_to_check in lexicographic_resource.findall('entry'):
        for element in entry_to_check:
            if element.tag not in structure['entry']:
                structure['entry'][element.tag] = {'attribs':[],'sub_elements':{}}
            else:
                for attr in element.attrib:
                    if attr not in structure['entry'][element.tag]['attribs']:
                        structure['entry'][element.tag]['attribs'].append(attr)
            actual_sub_elements = {}
            for sub_element in element:
                if sub_element.tag != "sense" and sub_element.tag not in actual_sub_elements:
                    actual_sub_elements[sub_element.tag] = 1
                elif sub_element.tag != "sense":
                    actual_sub_elements[sub_element.tag] += 1
                for sub_element_tag in actual_sub_elements:
                    if sub_element_tag not in structure['entry'][element.tag]['sub_elements']:
                        structure['entry'][element.tag]['sub_elements'][sub_element_tag] = actual_sub_elements[sub_element_tag]
                    elif actual_sub_elements[sub_element_tag] > structure['entry'][element.tag]['sub_elements'][sub_element_tag]:
                        structure['entry'][element.tag]['sub_elements'][sub_element_tag] = actual_sub_elements[sub_element_tag]
            for sense_element_to_check in entry_to_check.findall('sense'):
                for s_element in sense_element_to_check:
                    if s_element.tag not in structure['sense']:
                        structure['sense'][s_element.tag] = []
                    else:
                        for attr in s_element.attrib:
                            if attr not in structure['sense'][s_element.tag]:
                                structure['sense'][s_element.tag].append(attr)

    controldata['structure'] = structure
    dump_controldata(controldata)

# relation types
if 'relation_types' not in controldata:
    controldata['relation_types'] = {}
for relation_type_element in lexicographic_resource.findall('relationType'):
    relation_type = relation_type_element.attrib['type']
    print(f"Will check relation type '{relation_type}'...")
    if relation_type in controldata['relation_types']:
        print(f"Relation type '{relation_type}' is already there as '{controldata['relation_types'][relation_type]}'.")
    else:
        labels = [{'lang': 'en', 'value': relation_type}]
        descriptions = []
        for description_element in relation_type_element.findall('description'):
            descriptions = [{'lang': 'en', 'value': description_element.text.strip()}]
        rel_class = "Q109" # default is bidirectional relation type
        rel_prop = "P214" # default range is Lexeme
        rel_target_type = "entry" # default target type is entry
        for member_type_element in relation_type_element.findall('memberType'):
            if 'role' in member_type_element.attrib:
                if member_type_element.attrib['role'] == "reference_target":
                    rel_class = "Q110" # unidirectional relation type
                    rel_target_type = member_type_element.attrib['type']
                    if rel_target_type == "sense":
                        rel_prop = "P215" # target is sense (not entry)
                    break

        statements = [
            {'type': 'item', 'prop_nr': 'P5', 'value': rel_class},  # instance of dmlex relation type (unidirectional or bidirectional)
            {'type': 'property', 'prop_nr': 'P216', 'value': rel_prop},  # prop to use, depending on range Lexeme or Sense
            {'type': 'item', 'prop_nr': 'P207', 'value': dict_qid},
            {'type': 'string', 'prop_nr': 'P197', 'value': relation_type, 'qualifiers': [
                {'type': 'item', 'prop_nr': 'P207', 'value': dict_qid}
            ]}
        ]
        itemdata = {'qid': False, 'statements': statements, 'labels': labels, 'descriptions': descriptions}
        rel_qid = xwbi.itemwrite(itemdata)
        time.sleep(0.5)
        controldata['relation_types'][relation_type] = {'rel_prop':rel_prop, 'rel_qid':rel_qid}
        dump_controldata(controldata)


# process entries
entry_count = 0
source_mapping = get_mapping(dict_qid)
input(f"\n\nLoaded {len(source_mapping)} known source_id to wikibase mappings. Press ENTER to start to process entries.")
for entry in lexicographic_resource.findall('entry'):
    entry_count += 1
    if 'id' not in entry.attrib:
        print(f"Fatal error: There is no id attribute in this entry element.")
        sys.exit()
    entry_id = entry.attrib['id']
    if entry_id in source_mapping:
        print(f"\n[{entry_count}] Entry '{entry_id}' is already on Wikibase as {source_mapping[entry_id]}")
        continue
    source_mapping[entry_id] = {'lid': None, 'senses':{}}
    # part of speech
    pos_item = "Q108" # pos 'undefined'
    for part_of_speech in entry.findall('partOfSpeech'):
        if pos_item != "Q108": # if POS already has been set as not undefined
            print(f"Fatal error: There is more than one 'partofSpeech' element in this entry.")
            sys.exit()
        if 'tag' in part_of_speech.attrib:
            pos_tag = part_of_speech.attrib['tag']
            if pos_tag not in controldata['partOfSpeechTag']:
                print(f"Fatal error: POS tag '{pos}' is not a known controlled value.")
                sys.exit()
            pos_item = controldata['partOfSpeechTag'][pos_tag]
    print(f"\n[{entry_count}] Now processing entry '{entry_id}' with POS '{pos_item}'")

    lexeme = xwbi.wbi.lexeme.new(language=langCode_item, lexical_category=pos_item)

    # headword
    headword = None
    for headword_element in entry.findall('headword'):
        if headword: # if headword already has been set
            print(f"Fatal error: There is more than one 'headword' element in this entry.")
            sys.exit()
        headword = headword_element.text.strip()
        print(f"Found headword: '{headword}'.")
    lexeme.lemmas.set(language=langCode_wiki, value=headword)
    claim = xwbi.Item(prop_nr="P5", value="Q111", qualifiers=[
        xwbi.String(prop_nr="P186", value=entry_id)  # entry id in dmlex source
    ]) # instance of dmlex Entry
    lexeme.claims.add(claim)
    claim = xwbi.Item(prop_nr="P207", value=dict_qid) # dmlex source dict
    lexeme.claims.add(claim)

    # label at entry level
    for label_element in entry.findall('label'):
        claim = get_label_claim(label_element)
        lexeme.claims.add(claim, action_if_exists=xwbi.ActionIfExists.APPEND_OR_REPLACE)

    # pronunciation at entry level
    for pronunciation_element in entry.findall('pronunciation'):
        claim = get_pronunciation_claim(pronunciation_element)
        lexeme.claims.add(claim, action_if_exists=xwbi.ActionIfExists.APPEND_OR_REPLACE)
        print(f"Processed one pronunciation element and added data to entry.") # gets transcription text and scheme
        # TODO: possible other sub-elements to 'pronunciation' (other than 'transcription')
        # TODO: possible attributes to 'pronunciation'

    # inflectedForm
    for inflected_form_element in entry.findall('inflectedForm'):
        new_form = xwbi.Form()
        inflected_form_rep = None
        for inflected_form_text_element in inflected_form_element.findall('text'):
            if inflected_form_rep:
                print(f"Fatal error: There is more than one 'text' element in this 'inflected form' element.")
                sys.exit()
            inflected_form_rep = inflected_form_text_element.text.strip()
        print(f"+++ Adding new Form with the representation '{inflected_form_rep}'")
        new_form.representations.set(language=langCode_wiki, value=inflected_form_rep)
        # inflected form tag as statement to Form
        if 'tag' in inflected_form_element.attrib:
            if_tag = inflected_form_element.attrib['tag']
            print(f"+++ +++ Inflected form tag is '{if_tag}'")
            if if_tag not in controldata['inflectedFormTag']:
                print(f"Fatal error: The inflected form tag '{if_tag}' is not a defined controlled value.")
                sys.exit()
            if_tag_qid = controldata['inflectedFormTag'][if_tag]
            claim = xwbi.Item(prop_nr="P190", value=if_tag_qid, qualifiers=[xwbi.String(prop_nr="P197", value=if_tag)])
            new_form.claims.add(claim, action_if_exists=xwbi.ActionIfExists.APPEND_OR_REPLACE)

        lexeme.forms.add(new_form)
        lexeme.write() # due to bug (adds only the last of the new forms)
        time.sleep(0.50)

    # sense
    for sense_element in entry.findall('sense'):
        if 'id' not in sense_element.attrib:
            sense_id_qualifiers = []
            sense_id = None
        else:
            sense_id = sense_element.attrib['id']
            sense_id_qualifiers = [xwbi.String(prop_nr="P186", value=sense_id)] # sense id in source
        lexeme_sense = xwbi.Sense()
        claim = xwbi.Item(prop_nr="P5", value="Q112", qualifiers=sense_id_qualifiers) # instance of dmlex Sense
        lexeme_sense.claims.add(claim)
        print("*** Adding new sense...")

        # label at sense level
        for label_element in sense_element.findall('label'):
            claim = get_label_claim(label_element)
            lexeme_sense.claims.add(claim, action_if_exists=xwbi.ActionIfExists.APPEND_OR_REPLACE)

        # definition
        # (sent to glosses in dict main language, and to a P218 statement) # GLOSS now disabled
        gloss = ""
        definition_text_lang = langCode_wiki  # assumes that it is the main language of the dictionary here
        for definition_element in sense_element.findall('definition'):
            if gloss != "":
                gloss += " | "
            for definition_text_element in definition_element.findall('text'):
                definition_text = definition_text_element.text.strip()
            gloss += definition_text
            definition_type_qualifiers = []
            if 'definitionType' in definition_element.attrib:
                definition_type = definition_element.attrib['definitionType']
                if definition_type not in controldata['definitionTypeTag']:
                    print(
                        f"Fatal error: definitionTypeTag '{definition_type}' in Sense '{sense_id}' is not a known controlloed value.")
                    sys.exit()
                def_type_qid = controldata['definitionTypeTag'][definition_type]
                definition_type_qualifiers = [xwbi.Item(prop_nr="P189", value=def_type_qid)]
            claim = xwbi.MonolingualText(prop_nr="P218", text=definition_text, language=definition_text_lang,
                                         qualifiers=definition_type_qualifiers)
            lexeme_sense.claims.add(claim, action_if_exists=xwbi.ActionIfExists.APPEND_OR_REPLACE)
            print(f"Added definition '{definition_text}' for language '{definition_text_lang}'.")
            # lexeme_sense.glosses.set(language=langCode_wiki, value=gloss) # gloss now disabled

        # headword translation
        # (sent to sense glosses and attached as P217 statement)
        headword_trans_lang = controldata['trans_langCodes'][0]['langCode_wiki']  # assumes that the main translation language is the language here
        translated_gloss = ""
        for headword_trans_element in sense_element.findall('headwordTranslation'):
            if translated_gloss != "":
                translated_gloss += " | "
            for headword_trans_text_element in headword_trans_element.findall('text'):
                headword_trans = headword_trans_text_element.text.strip()
                translated_gloss += headword_trans
            headword_trans_label_qualifiers = []
            for label_element in headword_trans_element.findall('label'):
                if 'tag' in label_element.attrib:
                    tag = label_element.attrib['tag']
                    if tag not in controldata['labelTag']:
                        print(f"Fatal error: labelTag '{tag}' in headword translation in Sense '{sense_id}' is not a known controlloed value.")
                        sys.exit()
                    tag_qid = controldata['labelTag'][tag]
                    headword_trans_label_qualifiers.append(xwbi.Item(prop_nr="P203", value=tag_qid))
                    headword_trans_label_qualifiers.append(xwbi.String(prop_nr="P197", value=tag))
            claim = xwbi.MonolingualText(prop_nr="P217", text=headword_trans, language=headword_trans_lang, qualifiers=headword_trans_label_qualifiers)
            lexeme_sense.claims.add(claim, action_if_exists=xwbi.ActionIfExists.APPEND_OR_REPLACE)
            print(f"Added headword translation '{headword_trans}' for language '{headword_trans_lang}'.")
        if translated_gloss != "":
            lexeme_sense.glosses.set(language=headword_trans_lang, value=translated_gloss)
        else: # this means that it is a sense without any headword translation
            # Sense is treated as dummy sense (is not created on wikibase)
            with open('dmlex_mappings/dummysenses.txt', 'a') as file:
                file.write(f"{sense_id}\n")
            sense_id = None

        if sense_id:
            lexeme.senses.add(lexeme_sense)

    lexeme.write(clear=False)
    source_mapping[entry_id] = lexeme.id

    # get senses ID mapping after writing (when sense ids are assigned)
    senses_count = 0
    for wikibase_sense in lexeme.senses.get_json():
        senses_count += 1
        sense_id = str(senses_count) # if no proper sense_id is found (because the source has no sense id)
        for p5_claim in wikibase_sense['claims']['P5']: # looks for proper sense id
            if 'P186' in p5_claim['qualifiers']:
                sense_id = p5_claim['qualifiers']['P186'][0]['datavalue']['value']
        source_mapping[sense_id] = wikibase_sense['id']

    # examples now, after knowing the sense id mapping
    for sense_element in entry.findall('sense'):
        if 'id' not in sense_element.attrib:
            sense_id = None
            sense_lid = None
        else:
            sense_id = sense_element.attrib['id']
            if sense_id in source_mapping:
                sense_lid = source_mapping[sense_id]
            else:
                sense_lid = None
                print(f"Dummy sense '{sense_id}': sense id {sense_id} not found in mapping data.")
                time.sleep(1)
        for example_element in sense_element.findall('example'):
            print("... Adding new example")
            qualifiers = []
            if sense_lid:
                qualifiers.append(xwbi.SenseClaim(prop_nr="P211", value=sense_lid))
            example_text = None
            for example_text_element in example_element.findall('text'):
                example_text = re.sub(r'<[^>]+>', '', ET.tostring(example_text_element, encoding="unicode")).strip()

            # example headword and collocate markers
            for headword_marker_element in example_text_element.findall('headwordMarker'):
                qualifiers.append(xwbi.String(prop_nr="P219", value=headword_marker_element.text.strip()))
            for collocate_marker_element in example_text_element.findall('collocateMarker'):
                qualifiers.append(xwbi.String(prop_nr="P220", value=collocate_marker_element.text.strip()))

            # example translation
            for example_trans_element in example_element.findall('exampleTranslation'):
                for example_trans_text_element in example_trans_element.findall('text'):
                    example_trans_text = example_trans_text_element.text.strip()
                expl_trans_lang = controldata['trans_langCodes'][0]['langCode_wiki']
                # this takes the first listed translation language
                # TODO: ensure that there is only one translation language (and, if not, assign the correct translation language)
                qualifiers.append(xwbi.MonolingualText(prop_nr="P210", text=example_trans_text, language=expl_trans_lang))

            # source identity tag
            if 'sourceIdentity' in example_element.attrib:
                qualifiers.append(xwbi.String(prop_nr="P221", value=example_element.attrib['sourceIdentity']))
            # # source elaboration comment attribute
            # if 'sourceElaboration' in example_element.attrib:
            #     qualifiers.append(xwbi.String(prop_nr="", value=example_element.attrib['sourceElaboration']))

            claim = xwbi.MonolingualText(prop_nr="P213", text=example_text, language=langCode_wiki, qualifiers=qualifiers)
            lexeme.claims.add(claim, action_if_exists=xwbi.ActionIfExists.APPEND_OR_REPLACE)
    lexeme.write()

    print(f"\nFinished processing entry '{entry_id}', now on Wikibase, see it at 'https://lexbib.elex.is/wiki/Lexeme:{lexeme.id}'.")
    time.sleep(1)



sys.exit()

# add relations
print(f"\n Starting to add relations...")
relation_count = 0
for relation in lexicographic_resource.findall('relation'):
    relation_count += 1
    relation_type = relation.attrib['type']
    if relation_type not in controldata['relation_types']:
        print(f"Fatal error: relation type {relation_type} is not a known controlled value.")
        sys.exit()
    relation_prop = controldata['relation_types'][relation_type]['rel_prop']
    relation_qid = controldata['relation_types'][relation_type]['rel_qid']
    member_roles = {"reference-origin": None, "reference-target": None, "bidirectional": []}
    for member in relation.findall('member'):
        if 'role' not in member.attrib:
            member_roles['bidirectional'].append(member.attrib['ref'])
        elif member.attrib['role'] in member_roles:
            member_roles[member.attrib['role']] = member.attrib['ref']
    if len(member_roles['bidirectional']) == 2:
        print(f"[{relation_count}] Will write bidirectional relation to: {member_roles['bidirectional']}")
        write_relation(origin=source_mapping[member_roles['bidirectional'][0]], target=source_mapping[member_roles['bidirectional'][1]], prop=relation_prop)
        write_relation(origin=source_mapping[member_roles['bidirectional'][1]], target=source_mapping[member_roles['bidirectional'][0]], prop=relation_prop)
    elif member_roles["reference-origin"] and member_roles["reference-target"]:
        print(f"[{relation_count}] Will write unidirectional relation to: '{member_roles['reference-origin']}' with target '{member_roles['reference-target']}'")
        write_relation(origin=source_mapping[member_roles['reference-origin']], target=source_mapping[member_roles['reference-target']], prop=relation_prop)
    else:
        print(f"[{relation_count}] Fatal error: relation data not valid: {member_roles}")
        sys.exit()