OpenWebSearchEU-Public
Mosaic

Repository

# change the directory
cd scripts
# import indexes, clean the project and create a packaged JAR
./build.sh
# run the executable
./start.sh "[OPTION]" [API_PORT]
# change the directory
cd search-service
# clean the project and compile the source code
mvn clean compile
# build the executable
mvn package
# run the executable
java [-Dquarkus.http.port=<API_PORT>] -jar core/target/service.jar [OPTION]
java -jar core/target/service.jar
java -Dquarkus.http.port=8009 -jar core/target/service.jar
# change the directory
cd search-service
# run MOSAIC in dev mode
mvn quarkus:dev [-Dquarkus.http.port=<API_PORT>] [-Dquarkus:args="OPTION"]
# create an image
docker build -t mosaic .
# start a container
docker run -p 8008:8008 mosaic -p <YOUR_API_PORT>
docker run -p 8008:8008 mosaic
docker run -p 8008:8008 mosaic -p 8008
Options:
  -l, --lucene-dir-path <dir>    path of directory containing the Lucene index(es)
                                 (default = lucene directory of this repository)

  -p, --parquet-dir-path <dir>   path of directory containing the Parquet file(s)

                                 (default = resources directory of this repository)

  -i, --id-column <col>          column that contains the document identifiers
                                 (default = record_id)

  -n, --num-characters <num>     number of characters selected from the plain text column
                                 to be stored in the associated DB table column
                                 (if this option is not specified, the full plain text is imported)

  -d, --db-file-path <dir>       path of directory containing the database file (file is
                                 created when starting MOSAIC for the first time)
                                 (default = /tmp/mosaic_db)
./build.sh
./start.sh "[OPTION]" [API_PORT]
./start.sh "-c id" 8008
./import_index.sh <YOUR_CIFF_FILE_NAME> <YOUR_LUCENE_INDEX_NAME>
docker run \
    --rm \
    -p 8008:8008 \
    opencode.it4i.eu:5050/openwebsearcheu-public/mosaic
docker run \
    --rm \
    -p 8008:8008 \
    opencode.it4i.eu:5050/openwebsearcheu-public/mosaic/search-service \
    --lucene-dir-path /path/to/lucene-indexes/ \
    --parquet-dir-path /path/to/metadata-directories/
http://localhost:8008/search?q=<query>
http://localhost:8008/searchxml?q=<query>
http://localhost:8008/search?q=graz
http://localhost:8008/searchxml?q=graz
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">
  <title>MOSAIC Search: {searchTerms}</title>
  <description>Search results for "{searchTerms}" at MOSAIC Search Service</description>
  <author>
    <name>OpenWebSearch.eu</name>
  </author>
  <opensearch:totalResults>1121</opensearch:totalResults>
  <opensearch:startIndex>1</opensearch:startIndex>
  <opensearch:itemsPerPage>20</opensearch:itemsPerPage>
  <opensearch:Query role="request" searchTerms="{searchTerms}" startPage="1"/>
  <link rel="alternate" href="{baseUrl}/search?q={searchTerms}&pw=1&limit=20" type="application/json"/>
  <link rel="self" href="{baseUrl}/searchxml?q={searchTerms}&pw=1&limit=20" type="application/atom+xml"/>
  <link rel="next" href="{baseUrl}/searchxml?q={searchTerms}&pw=2&limit=20" type="application/atom+xml"/>
  <link rel="last" href="{baseUrl}/searchxml?q={searchTerms}&pw=56&limit=20" type="application/atom+xml"/>
  <link rel="search" type="application/opensearchdescription+xml" href="{baseUrl}/opensearch.xml"/>
  <item>
    ...
  </item>
  <item>
    ...
  </item>
  ...
</feed>
{
  "results": [
    {
      "core-index": {
        "documentCount": 285392,
        "languages": [
          "deu",
          "eng",
          "est",
          "fra",
          "ltz",
          "pol",
          "unknown",
          "zho"
        ]
      }
    },
    ...
  ]
}
http://localhost:8008/full-text?id=<id>&column=<column>
http://localhost:8008/full-text?id=0f02f96c-a2da-49c2-9e6b-95e17d95cbf1
{
  "id": "2f3232a3-c4f3-4ae6-990b-290dde685bc7",
  "url": "http://info.cern.ch/hypertext/WWW/TheProject.html",
  "title": "The World Wide Web Project",
  "textSnippet": "The WorldWideWeb (W3) is a wide-area hypermedia information retrieval initiative aiming to give universal access to a large universe of documents.",
  "language": "eng",
  "warcDate": 1705353588000000,
  "wordCount": 129
}
<item>
  <title>The World Wide Web Project</title>
  <link>http://info.cern.ch/hypertext/WWW/TheProject.html</link>
  <description>The WorldWideWeb (W3) is a wide-area hypermedia information retrieval initiative aiming to give universal access to a large universe of documents.</description>
  <id>2f3232a3-c4f3-4ae6-990b-290dde685bc7</id>
  <language>eng</language>
  <warcDate>1705353588000000</warcDate>
  <wordCount>129</wordCount>
  <index>core-index</index>
</item>
...
"locations": [
  {
    "locationName": "Wien",
    "locationEntries": [
      {
        "latitude": 48.2082,
        "longitude": 16.37169,
        "alpha2CountryCode": "AT"
      },
      ...
    ]
  },
  ...
]
<locations>
  <location>
    <locationName>Wien</locationName>
    <locationEntries>
      <locationEntry>
        <latitude>48.2082</latitude>
        <longitude>16.37169</longitude>
        <alpha2CountryCode>AT</alpha2CountryCode>
      </locationEntry>
      ...
    </locationEntries>
  </location>
  ...
</locations>
"keywords": [
  ...,
  ...
]
<keywords>
  <keyword>...</keyword>
  ...
</keywords>
<dependencies>
    <dependency>
        <groupId>eu.ows.mosaic</groupId>
        <artifactId>shared</artifactId>
        <version>1.0-SNAPSHOT</version>
    </dependency>
</dependencies>
<dependency>
    <groupId>eu.ows.mosaic</groupId>
    <artifactId>MODULE_NAME</artifactId>
    <version>1.0-SNAPSHOT</version>
    <optional>true</optional>
</dependency>
<module>MODULE_NAME</module>