Commit 0c6520a9 authored by Andreas Wolf's avatar Andreas Wolf

Merge branch '5-configure-solr-indexes' into 'master'

Resolve "Implement indexer for Solr and TYPO3 extension data"

Closes #5

See merge request !24
parents 7cbd2968 4bfda2dd
......@@ -12,4 +12,4 @@ htdocs/index.php
htdocs/typo3conf/ext/*
htdocs/typo3conf/PackageStates.php
!htdocs/typo3conf/ext/ter*
data/etc/solr/
data/etc/solr/server/solr/mycores/t3o/data
{
"initArgs":{"ignoreCase":false},
"managedList":[]}
\ No newline at end of file
<!-- The content of this page will be statically included into the top
of the admin page. Uncomment this as an example to see there the content
will show up.
<hr>
<i>This line will appear before the first table</i>
<tr>
<td colspan="2">
This row will be appended to the end of the first table
</td>
</tr>
<hr>
-->
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- Example exchange rates file for CurrencyField type named "currency" in example schema -->
<currencyConfig version="1.0">
<rates>
<!-- Updated from http://www.exchangerate.com/ at 2011-09-27 -->
<rate from="USD" to="ARS" rate="4.333871" comment="ARGENTINA Peso" />
<rate from="USD" to="AUD" rate="1.025768" comment="AUSTRALIA Dollar" />
<rate from="USD" to="EUR" rate="0.743676" comment="European Euro" />
<rate from="USD" to="BRL" rate="1.881093" comment="BRAZIL Real" />
<rate from="USD" to="CAD" rate="1.030815" comment="CANADA Dollar" />
<rate from="USD" to="CLP" rate="519.0996" comment="CHILE Peso" />
<rate from="USD" to="CNY" rate="6.387310" comment="CHINA Yuan" />
<rate from="USD" to="CZK" rate="18.47134" comment="CZECH REP. Koruna" />
<rate from="USD" to="DKK" rate="5.515436" comment="DENMARK Krone" />
<rate from="USD" to="HKD" rate="7.801922" comment="HONG KONG Dollar" />
<rate from="USD" to="HUF" rate="215.6169" comment="HUNGARY Forint" />
<rate from="USD" to="ISK" rate="118.1280" comment="ICELAND Krona" />
<rate from="USD" to="INR" rate="49.49088" comment="INDIA Rupee" />
<rate from="USD" to="XDR" rate="0.641358" comment="INTNL MON. FUND SDR" />
<rate from="USD" to="ILS" rate="3.709739" comment="ISRAEL Sheqel" />
<rate from="USD" to="JPY" rate="76.32419" comment="JAPAN Yen" />
<rate from="USD" to="KRW" rate="1169.173" comment="KOREA (SOUTH) Won" />
<rate from="USD" to="KWD" rate="0.275142" comment="KUWAIT Dinar" />
<rate from="USD" to="MXN" rate="13.85895" comment="MEXICO Peso" />
<rate from="USD" to="NZD" rate="1.285159" comment="NEW ZEALAND Dollar" />
<rate from="USD" to="NOK" rate="5.859035" comment="NORWAY Krone" />
<rate from="USD" to="PKR" rate="87.57007" comment="PAKISTAN Rupee" />
<rate from="USD" to="PEN" rate="2.730683" comment="PERU Sol" />
<rate from="USD" to="PHP" rate="43.62039" comment="PHILIPPINES Peso" />
<rate from="USD" to="PLN" rate="3.310139" comment="POLAND Zloty" />
<rate from="USD" to="RON" rate="3.100932" comment="ROMANIA Leu" />
<rate from="USD" to="RUB" rate="32.14663" comment="RUSSIA Ruble" />
<rate from="USD" to="SAR" rate="3.750465" comment="SAUDI ARABIA Riyal" />
<rate from="USD" to="SGD" rate="1.299352" comment="SINGAPORE Dollar" />
<rate from="USD" to="ZAR" rate="8.329761" comment="SOUTH AFRICA Rand" />
<rate from="USD" to="SEK" rate="6.883442" comment="SWEDEN Krona" />
<rate from="USD" to="CHF" rate="0.906035" comment="SWITZERLAND Franc" />
<rate from="USD" to="TWD" rate="30.40283" comment="TAIWAN Dollar" />
<rate from="USD" to="THB" rate="30.89487" comment="THAILAND Baht" />
<rate from="USD" to="AED" rate="3.672955" comment="U.A.E. Dirham" />
<rate from="USD" to="UAH" rate="7.988582" comment="UKRAINE Hryvnia" />
<rate from="USD" to="GBP" rate="0.647910" comment="UNITED KINGDOM Pound" />
<!-- Cross-rates for some common currencies -->
<rate from="EUR" to="GBP" rate="0.869914" />
<rate from="EUR" to="NOK" rate="7.800095" />
<rate from="GBP" to="NOK" rate="8.966508" />
</rates>
</currencyConfig>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- If this file is found in the config directory, it will only be
loaded once at startup. If it is found in Solr's data
directory, it will be re-loaded every commit.
-->
<elevate>
<query text="foo bar">
<doc id="1" />
<doc id="2" />
<doc id="3" />
</query>
<query text="ipod">
<doc id="MA147LL/A" /> <!-- put the actual ipod at the top -->
<doc id="IW-02" exclude="true" /> <!-- exclude this cable -->
</query>
</elevate>
{
"initArgs": {
"ignoreCase": true
},
"initializedOn": "2014-04-29T23:08:58.000Z",
"managedList": [
"i",
"me",
"my",
"myself",
"we",
"us",
"our",
"ours",
"ourselves",
"you",
"your",
"yours",
"yourself",
"yourselves",
"he",
"him",
"his",
"himself",
"she",
"her",
"hers",
"herself",
"it",
"its",
"itself",
"they",
"them",
"their",
"theirs",
"themselves",
"what",
"which",
"who",
"whom",
"this",
"that",
"these",
"those",
"am",
"is",
"are",
"was",
"were",
"be",
"been",
"being",
"have",
"has",
"had",
"having",
"do",
"does",
"did",
"doing",
"would",
"should",
"could",
"ought",
"i'm",
"you're",
"he's",
"she's",
"it's",
"we're",
"they're",
"i've",
"you've",
"we've",
"they've",
"i'd",
"you'd",
"he'd",
"she'd",
"we'd",
"they'd",
"i'll",
"you'll",
"he'll",
"she'll",
"we'll",
"they'll",
"isn't",
"aren't",
"wasn't",
"weren't",
"hasn't",
"haven't",
"hadn't",
"doesn't",
"don't",
"didn't",
"won't",
"wouldn't",
"shan't",
"shouldn't",
"can't",
"cannot",
"couldn't",
"mustn't",
"let's",
"that's",
"who's",
"what's",
"here's",
"there's",
"when's",
"where's",
"why's",
"how's",
"an",
"the",
"and",
"but",
"if",
"or",
"because",
"as",
"until",
"while",
"of",
"at",
"by",
"for",
"with",
"about",
"against",
"between",
"into",
"through",
"during",
"before",
"after",
"above",
"below",
"to",
"from",
"up",
"down",
"in",
"out",
"on",
"off",
"over",
"under",
"again",
"further",
"then",
"once",
"here",
"there",
"when",
"where",
"why",
"how",
"all",
"any",
"both",
"each",
"few",
"more",
"most",
"other",
"some",
"such",
"no",
"nor",
"not",
"only",
"own",
"same",
"so",
"than",
"too",
"very"
]
}
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8" ?>
<!--
The schema name property is constructed as follows
tx_solr - The extension key
x-y-z - The extension version this schema is meant to work with
YYYYMMDD - The date the schema file was changed the last time
When changing the schema the name property must be updated. There is a
status report - tx_solr_report_SchemaStatus - checking against this
name property, that status check must be updated as well.
-->
<schema name="tx_solr-6-0-0--20161209" version="1.6" >
<!-- attribute "name" is the name of this schema and is only used for display purposes.
Applications should change this to reflect the nature of the search collection.
version="1.4" is Solr's version number for the schema syntax and semantics. It should
not normally be changed by applications.
1.0: multiValued attribute did not exist, all fields are multiValued by nature
1.1: multiValued attribute introduced, false by default
1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
1.3: removed optional field compress feature
1.4: default auto-phrase (QueryParser feature) to off
1.5: omitNorms defaults to true for primitive field types (int, float, boolean, string...)
1.6: useDocValuesAsStored defaults to true.
-->
<uniqueKey>id</uniqueKey>
<!-- xinclude field types -->
<xi:include href="../general_schema_types.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
<!-- xinclude fields-->
<xi:include href="../general_schema_fields.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
<!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
Synonyms and stopwords are customized by external files, and stemming is enabled.
Duplicate tokens at the same position (which may result from Stemmed Synonyms or
WordDelim parts) are removed.
-->
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1"
generateNumberParts="1"
catenateWords="1"
catenateNumbers="1"
catenateAll="0"
splitOnCaseChange="1"
preserveOriginal="1"
protected="english/protwords.txt"
/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ManagedSynonymFilterFactory" managed="english"/>
<filter class="solr.ManagedStopFilterFactory" managed="english"/>
<filter class="solr.EnglishPossessiveFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="english/protwords.txt"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1"
generateNumberParts="1"
catenateWords="0"
catenateNumbers="0"
catenateAll="0"
splitOnCaseChange="1"
preserveOriginal="1"
protected="english/protwords.txt"
/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ManagedSynonymFilterFactory" managed="english"/>
<filter class="solr.ManagedStopFilterFactory" managed="english"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="english/protwords.txt"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
<fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="0"
generateNumberParts="0"
catenateWords="1"
catenateNumbers="1"
catenateAll="0"
preserveOriginal="1"
protected="english/protwords.txt"
/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ManagedSynonymFilterFactory" managed="english"/>
<filter class="solr.ManagedStopFilterFactory" managed="english"/>
<filter class="solr.EnglishPossessiveFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="english/protwords.txt"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<!-- Exact matching of words like textWhiteSpaceTokenized,
but with enabled Synonym and Stop Filter
-->
<fieldType name="textExact" class="solr.TextField" positionIncrementGap="100" >
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ManagedSynonymFilterFactory" managed="english"/>
<filter class="solr.ManagedStopFilterFactory" managed="english"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<!-- Setup simple analysis for spell checking -->
<fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100" omitNorms="true">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<!-- no synonyms here because we do not want to add them as spell suggestion -->
<filter class="solr.ManagedStopFilterFactory" managed="english"/>
<filter class="solr.StandardFilterFactory" />
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory" />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ManagedSynonymFilterFactory" managed="english"/>
<filter class="solr.ManagedStopFilterFactory" managed="english"/>
<filter class="solr.StandardFilterFactory" />
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
</schema>
\ No newline at end of file
aaa => aaaa
bbb => bbbb1 bbbb2
ccc => cccc1,cccc2
a\=>a => b\=>b
a\,a => b\,b
fooaaa,baraaa,bazaaa
# Some synonym groups specific to this example
GB,gib,gigabyte,gigabytes
MB,mib,megabyte,megabytes
Television, Televisions, TV, TVs
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
#after us won't split it into two words.
# Synonym mappings can be used for spelling correction too
pixima => pixma
<fields>
<!--
Valid attributes for fields:
name: mandatory - the name for the field
type: mandatory - the name of a previously defined type from the
section
indexed: true if this field should be indexed (searchable or sortable)
stored: true if this field should be retrievable
multiValued: true if this field may contain multiple values per document
omitNorms: (expert) set to true to omit the norms associated with
this field (this disables length normalization and index-time
boosting for the field, and saves some memory). Only full-text
fields or fields that need an index-time boost need norms.
termVectors: [false] set to true to store the term vector for a
given field.
When using MoreLikeThis, fields used for similarity should be
stored for best performance.
termPositions: Store position information with the term vector.
This will increase storage costs.
termOffsets: Store offset information with the term vector. This
will increase storage costs.
default: a value that should be used if no value is specified
when adding a document.
-->
<field name="_version_" type="long" indexed="true" stored="true"/>
<!--
points to the root document of a block of nested documents.
Required for nested document support
-->
<field name="_root_" type="string" indexed="true" stored="false"/>
<!--
The document id is derived from a site-specific key (hash) and some
record properties like:
$document->id = $siteHash . '/' . $type . '/' . $record['uid'];
-->
<field name="id" type="string" indexed="true" stored="true" required="true" />
<!--
An additional ID used for record collapsing
typically will be $type/$record['uid']
When indexing files the id field is not generated after that schema
so we need an additional field for collapsing results
-->
<field name="variantId" type="string" indexed="true" stored="true" />
<!--
Using these fields we can "connect" the indexed documents to
specific sites.
-->
<field name="site" type="string" indexed="true" stored="true" docValues="true" />
<field name="siteHash" type="string" indexed="true" stored="true" docValues="true" />
<!--
The application key which will come in handy as soon as other
systems start sending their content to the same index. Thus you
can search "external" systems' content through TYPO3
-->
<field name="appKey" type="string" indexed="true" stored="false" docValues="true" required="true" />
<!--
The type by default represents the table name of a record. For
external systems this field could be used differently.
-->
<field name="type" type="string" indexed="true" stored="true" docValues="true" required="true" />
<!--
The content hashs or more known as cHash in short is used to store
a hash over the GET parameters used to request a URL and identify a
cache entry for the generated page.
-->
<field name="contentHash" type="string" indexed="false" stored="true" />
<!--
Here, default is used to create a "timestamp" field indicating when
the document was indexed.
-->
<field name="indexed" type="date" indexed="true" stored="true" default="NOW/SECOND" />
<!-- system fields -->
<field name="uid" type="integer" indexed="true" stored="true" />
<field name="pid" type="integer" indexed="true" stored="true" docValues="true" />
<field name="typeNum" type="integer" indexed="true" stored="true" />
<field name="created" type="date" indexed="true" stored="true" docValues="true" />