Skip to content
1 change: 1 addition & 0 deletions configurations/clarin-si/server.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ baseurl = "https://www.clarin.si/ske/#concordance?tab=advanced&queryselector=cql
system_name = "CLARIN.SI noSketch Engine"
corpus_configs = "configurations/clarin-si/corpora"
allow_external_search = true
examples_url = "./slovene_examples.html"

[general]
ud_mode = true
Expand Down
15 changes: 15 additions & 0 deletions src/cqp_tree/web/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@
validation_type=bool,
default_value=False,
),
DeclaredConfig(
key='examples_url',
readable_name='Examples URL',
readable_description='URL of a page with more advanced examples, possibly system-specific.',
validation_type=str,
default_value='https://grew.fr/tutorial/top/',
),
)

TEMPLATE_DIR = Path(__file__).parent / 'static'
Expand Down Expand Up @@ -88,6 +95,10 @@ def external_search():
def about():
return serve_about(config)

@server.route('/slovene_examples.html', methods=['GET'])
def slovene_examples():
return serve_examples('slovene')

return server


Expand Down Expand Up @@ -144,6 +155,10 @@ def serve_about(config: Configuration):
return render_template('about.html', cfg=config, version=cqp_tree.VERSION)


def serve_examples(lang: str):
return render_template(f'{lang}_examples.html'.format())


def serve_external_search():
if 'url' not in request.args or 'query' not in request.args:
return bad_request('Missing required query parameters "url" or "query".')
Expand Down
76 changes: 67 additions & 9 deletions src/cqp_tree/web/static/about.html
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,17 @@
</head>

<body>
<script src="https://cdn.jsdelivr.net/npm/clipboard@2/dist/clipboard.min.js"></script>
<div class="container">
<div class="row justify-content-center min-vh-100">
<div class="col-md-10 col-lg-8 flex-grow-1">
<div>

<h1 class="my-5">CQP/Tree</h1>
<h2>Convert syntactic tree queries into CQL</h2>
<h2>What is CQP/Tree?</h2>

<p>
CQP/Tree converts readable syntactic tree queries into CQL for use in Sketch Engine, Korp, Corpus
CQP/Tree converts human-readable syntactic tree queries into CQL for use in Sketch Engine, Korp, Corpus
Workbench and related corpus search systems. This installation is primarily intended to support
syntactic searching in
{% if cfg.homepage %}
Expand All @@ -38,12 +39,46 @@ <h2>Convert syntactic tree queries into CQL</h2>
Workbench-compatible corpora and platforms.
</p>

<h2>Getting started</h2>
<p>
If you are new to syntactic tree querying, you may want to start with the example queries below or
consult the documentation for the supported input languages, such as
<a href="https://orodja.cjvt.si/drevesnik/help/en/">depsearch</a>
or
<a href="https://universal.grew.fr/?corpus=UD_English-ParTUT@2.18">Grew</a>.
CQP/Tree supports three main input languages:
<a href="https://grew.fr/grew_match/help/">Grew-match</a>,
<a href="https://orodja.cjvt.si/drevesnik/help/en/">depsearch</a> and
<a href="https://github.com/GrammaticalFramework/deptreepy">deptreepy</a>
All three languages prioritize making it easy to search for syntactic relations between tokens.
</p>

<p>
If you wanted to look for <b>ditransitive predicates</b> in a Universal Dependencies treebank, for instance, you could write the Grew query
{% with name="grew", code='''pattern {
A [upos=\"VERB\"]; % there is a token called A whose UPOS tag is VERB
A -[obj]-> B; % A has a dependent, B, which is labelled as its direct (obj)
A -[iobj]-> C % A also has another dependent, C, labelled as indirect object (iobj)
}''' %}
{% include "snippet.html" %}
{% endwith %}

In depsearch, you could write the same query as

{% with name="depsearch", code="VERB >obj _ >iobj _" %}
{% include "snippet.html" %}
{% endwith %}

whose deptreepy equivalent is

{% with name="deptreepy", code="TREE_ (POS VERB) (DEPREL obj) (DEPREL iobj)" %}
{% include "snippet.html" %}
{% endwith %}
</p>

<p>
CQP/Tree can convert all of these syntaxes to CQL and allows you to run them directly on your corpus of choice.
To get started, try copying and pasting one of the queries above into the
<a href="..">web interface</a>!
</p>

<p>
For some more advanced examples, follow <a href="{{ cfg.examples_url }}">this link</a>.
</p>

<h2>
Expand All @@ -52,8 +87,31 @@ <h2>
<p>
Translated queries may differ from native syntactic search tools in terms of precision and recall,
especially for complex multi-token queries. For details on the translation approach, evaluation and
known limitations, see the
<a href="https://lrec.elra.info/lrec2026-main-914">accompanying paper</a>.
known limitations, see Section 7 of the
<a href="https://doi.org/10.63317/2vfu2ssa33us">accompanying paper</a>.
</p>

<h2>
How to cite
</h2>
<p>
If you use CQP/Tree in your research, you are welcome to cite
<a href="https://doi.org/10.63317/2vfu2ssa33us">Niklas Deworetzki and Arianna Masciolini. <i>Syntactic Sugar for Syntactic Queries: Sequential Representations for Dependency Queries</i>. In Proceedings of the Fifteenth Language Resources and Evaluation Conference (LREC 2026)</a>:

{% with name="bibtex", code="""@inproceedings{deworetzki-etal-2026-syntactic,
title = {Syntactic Sugar for Syntactic Queries: Sequential Representations for Dependency Queries},
author = {Deworetzki, Niklas and Masciolini, Arianna},
booktitle = {Proceedings of the Fifteenth Language Resources and Evaluation Conference ({LREC} 2026)},
month = {May},
year = {2026},
pages = {11669--11678},
address = {Palma, Mallorca, Spain},
publisher = {European Language Resources Association (ELRA)},
editor = {Piperidis, Stelios and Bel, Núria and van den Heuvel, Henk and Ide, Nancy and Krek, Simon and Toral, Antonio},
doi = {10.63317/2vfu2ssa33us},
}""" %}
{% include "snippet.html" %}
{% endwith %}
</p>

<h2>
Expand Down
129 changes: 129 additions & 0 deletions src/cqp_tree/web/static/slovene_examples.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
<!doctype html>
<html lang="en">

<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet"
integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.13.1/font/bootstrap-icons.min.css">
<title>Examples (Slovene)</title>

<style>
p {
text-align: justify;
}
</style>
</head>

<body>
<script src="https://cdn.jsdelivr.net/npm/clipboard@2/dist/clipboard.min.js"></script>
<div class="container">
<div class="row justify-content-center min-vh-100">
<div class="col-md-10 col-lg-8 flex-grow-1">
<h1 class="my-5">Some more examples</h1>
<p>
If you have reached this page, you are probably wondering what kind of patterns you can search for with CQP/Tree.
In this page, we have gathered a few examples in
<a href="https://orodja.cjvt.si/drevesnik/help/en/">depsearch</a> and
<a href="https://grew.fr/grew_match/help/">Grew-match</a>.
You can copy-paste them into the
<a href="..">web interface</a>
and run them on
<a href="https://www.clarin.si/ske/#dashboard?corpname=suk11">SUK 1.1</a>.
<!--<a href="https://www.clarin.si/ske/#dashboard?corpname=gfida">Gigafida 2.2</a>.-->
With some adjustments, you can search for similar structures on any dependency-annotated corpus available on CLARIN.SI noSketch Engine,
or even on other CQL-compatible corpus search systems.
</p>

<h2>Expletives</h2>
<p>
<i>Expletives</i> are nominals that appear in an argument position of a predicate, but do not themselves fill any of the semantic roles of the predicate.
This is a simple depsearch query to search for expletives attached to lexical verbs:
{% with name="expl1-depsearch", code="VERB >expl _" %}
{% include "snippet.html" %}
{% endwith %}
</p>
<p>
or in Grew-match:
{% with name="expl1-grew", code="""pattern { X -[expl]-> Y;
X [ud_pos=VERB];
}""" %}
{% include "snippet.html" %}
{% endwith %}
</p>
<p>
These queries will fetch inherently reflective verbs, such as <i>smejati se</i> ('to laugh') but also other, non-reflexive expletives, such as <i>piti ga</i> ('to drink it').
If you are only interested in non-reflexive expletives, you can refine the query.
In depsearch:
{% with name="expl2-depsearch", code="VERB >expl PRON&!L=se" %}
{% include "snippet.html" %}
{% endwith %}
</p>
<p>
or in Grew-match:
{% with name="expl2-grew", code="""pattern { X -[expl]-> Y;
X [ud_pos=VERB];
Y [ud_pos=PRON, lemma <> \"se\"];
}""" %}
{% include "snippet.html" %}
{% endwith %}
</p>

<p>
As you can see in the examples above, one of the reasons why the depsearch queries are more concise is that some things are left implicit.
In Grew-match, on the other hand, you always (excepts for dependency labels) have to specify the name of the annotation layers you want to search (in this case, <b>lemma</b> and <b>ud_pos</b>).
This means that, if you want to run the Grew queries on a different corpus, you may have to modify these names.
For Gigafida 2.2, for instance, <b>ud_pos</b> should be replaced with <b>pos</b>.
To find out what different annotation layers are called in different corpora, select a corpus and click on <i class="bi bi-gear"></i>.
If your corpus of choice is not listed, select the corpus on noSketch Engine and click on "corpus info" to find out.
</p>

<h2>Case errors in negated sentences</h2>
<p>
Let's look at a more complex depsearch query:
{% with name="caserr-depsearch", code="VERB >obj NOUN&Case=Acc >aux|>advmod Polarity=Neg" %}
{% include "snippet.html" %}
{% endwith %}
</p>
<p>
This can be used to search for clauses with negative polarity with an object in accusative rather than genitive case, such as <i>to ne vem</i> ('I don't know this'), where <i>to</i> is incorrectly inflected for case.
</p>
<p>
The Grew-match equivalent is:
{% with name="caserr-grew", code="""pattern {
V [ud_pos = VERB];
N [ud_pos = NOUN, Case = Acc];
P [Polarity = Neg];
V -[obj]-> N;
V -[aux|advmod]-> P;
}""" %}
{% include "snippet.html" %}
{% endwith %}
</p>

<h2>Agents, but not subjects</h2>
<p>
So far, we have only looked at examples that rely exclusively on UD annotations.
However, you can search for any other token-level attributes, as long as you know how the annotation layer is called for the corpus you want to use.
As mentioned above, you can find out by selecting the corpus on noSketch Engine and clicking on "corpus info".
</p>
<p>
For instance, on SUK you can easily look for semantic <i>actants</i> (agents) which are syntactically not expressed as subjects:
{% with name="agents-depsearch", code="srl_dep=\"ACT\" <!nsubj _" %}
{% include "snippet.html" %}
{% endwith %}
</p>

<p>
In Grew-match:
{% with name="agents-grew", code="""pattern { X -[^nsubj]-> Y;
Y [srl_dep=\"ACT\"];
}""" %}
{% include "snippet.html" %}
{% endwith %}
</p>
</div>
</div>
</div>

12 changes: 12 additions & 0 deletions src/cqp_tree/web/static/snippet.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<script>
new ClipboardJS('#copy-{{name}}');
</script>

<div class="border rounded bg-light">
<button type="button" id="copy-{{name}}" class="btn btn-outline-primary float-end"
data-clipboard-target="#{{name}}" title="Copy to clipboard">
<i class="bi bi-clipboard"></i>
</button>
<pre id="{{name}}">
{{code}}</pre>
</div>
4 changes: 1 addition & 3 deletions tests/conll/test_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

CONFIG = cqp_tree.default_configuration()


class TranslationTests(unittest.TestCase):
dir_path = os.path.dirname(os.path.realpath(__file__))

Expand Down Expand Up @@ -218,7 +219,6 @@ def test_anchor_on_first_token(self):
res.constraints,
)


def test_anchor_on_last_token(self):
text = '''
1 _ _ _ _ _ _ _ _ _
Expand All @@ -233,7 +233,6 @@ def test_anchor_on_last_token(self):
res.constraints,
)


def test_subsequent_tokens(self):
text = '''
1 _ _ _ _ _ _ _ _ _
Expand All @@ -248,7 +247,6 @@ def test_subsequent_tokens(self):
res.constraints,
)


def test_ordered_tokens(self):
text = '''
1 _ _ _ _ _ _ _ _ ordered=Yes
Expand Down
2 changes: 1 addition & 1 deletion tests/deptreepy/test_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def test_true(self):
(q,) = translate_deptreepy('TRUE', CONFIG).queries
self.assertEqual(1, len(q.tokens), 'Query should have one token.')
self.assertEqual(0, len(q.predicates), 'Query should not have any predicates.')
(token, ) = q.tokens
(token,) = q.tokens
self.assertIsNone(token.attributes)

def test_and_predicate(self):
Expand Down
1 change: 0 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,3 @@ def test_names_generates_fresh_names(self):

if index == limit:
break