My goal is to add Weaviate support to the pyLodStorage project Specifically I'd like to use the sample data from:
https://github.com/WolfgangFahl/pyLoDStorage/blob/master/lodstorage/sample.py
Which has
- a few records of Persons from the Royal family
- a city list with a few thousand entries
- an artificial list of records with as many records as you wish
as examples.
All data is tabular. Some basic python types like:
- str
- bool
- int
- float
- date
- datetime
need to be supported.
I created the project http://wiki.bitplan.com/index.php/DgraphAndWeaviateTest and a script to run Weaviate via docker compose. There is a python unit test which used to work with the Weaviate Python client 0.4.1
I am trying to use the information from https://www.semi.technology/documentation/weaviate/current/how-tos/how-to-create-a-schema.html to refactor this unit test but don't know how to do it.
What needs to be done to get the CRUD tests running as e.g. in the other three tests: https://github.com/WolfgangFahl/pyLoDStorage/tree/master/tests for
- JSON
- SPARQL
- SQL
i am especially interested in the "round-trip" handling of list of dicts (aka "Table") with the standard data types above. So I'd like to create a list of dicts and then:
- derive the schema automatically by looking at some sample records
- check if the schema already exists and if delete it
- create the schema
- check if the data already exits and if delete it
- add the data and store it
- optionaly store the schema for further reference
- restore the data with or without using the schema information
check that the restored data (list of Dicts) is the same as the original data
Created on 2020-07-24
@author: wf
'''
import unittest
import weaviate
import time
#import getpass
class TestWeaviate(unittest.TestCase):
# https://www.semi.technology/documentation/weaviate/current/client-libs/python.html
def setUp(self):
self.port=8153
self.host="localhost"
#if getpass.getuser()=="wf":
# self.host="zeus"
# self.port=8080
pass
def getClient(self):
self.client=weaviate.Client("http://%s:%d" % (self.host,self.port))
return self.client
def tearDown(self):
pass
def testRunning(self):
'''
make sure weaviate is running
'''
w=self.getClient()
self.assertTrue(w.is_live())
self.assertTrue(w.is_ready())
def testWeaviateSchema(self):
''' see https://www.semi.technology/documentation/weaviate/current/client-libs/python.html '''
w = self.getClient()
#contains_schema = w.schema.contains()
try:
w.create_schema("https://raw.githubusercontent.com/semi-technologies/weaviate-python-client/master/documentation/getting_started/people_schema.json")
except:
pass
entries=[
[ {"name": "John von Neumann"}, "Person", "b36268d4-a6b5-5274-985f-45f13ce0c642"],
[ {"name": "Alan Turing"}, "Person", "1c9cd584-88fe-5010-83d0-017cb3fcb446"],
[ {"name": "Legends"}, "Group", "2db436b5-0557-5016-9c5f-531412adf9c6" ]
]
for entry in entries:
dict,type,uid=entry
try:
w.create(dict,type,uid)
except weaviate.exceptions.ThingAlreadyExistsException as taee:
print ("%s already created" % dict['name'])
pass
def testPersons(self):
return
w = self.getClient()
schema = {
"actions": {"classes": [],"type": "action"},
"things": {"classes": [{
"class": "Person",
"description": "A person such as humans or personality known through culture",
"properties": [
{
"cardinality": "atMostOne",
"dataType": ["text"],
"description": "The name of this person",
"name": "name"
}
]}],
"type": "thing"
}
}
w.create_schema(schema)
w.create_thing({"name": "Andrew S. Tanenbaum"}, "Person")
w.create_thing({"name": "Alan Turing"}, "Person")
w.create_thing({"name": "John von Neumann"}, "Person")
w.create_thing({"name": "Tim Berners-Lee"}, "Person")
def testEventSchema(self):
'''
https://stackoverflow.com/a/63077495/1497139
'''
return
schema = {
"things": {
"type": "thing",
"classes": [
{
"class": "Event",
"description": "event",
"properties": [
{
"name": "acronym",
"description": "acronym",
"dataType": [
"text"
]
},
{
"name": "inCity",
"description": "city reference",
"dataType": [
"City"
],
"cardinality": "many"
}
]
},
{
"class": "City",
"description": "city",
"properties": [
{
"name": "name",
"description": "name",
"dataType": [
"text"
]
},
{
"name": "hasEvent",
"description": "event references",
"dataType": [
"Event"
],
"cardinality": "many"
}
]
}
]
}
}
client = self.getClient()
if not client.contains_schema():
client.create_schema(schema)
event = {"acronym": "example"}
client.create(event, "Event", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
city = {"name": "Amsterdam"}
client.create(city, "City", "c60505f9-8271-4eec-b998-81d016648d85")
time.sleep(2.0)
client.add_reference("c60505f9-8271-4eec-b998-81d016648d85", "hasEvent", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()
The unit test for the connection, schema and data objects you show above works like this with the Python client v1.x (see the inline comments for what's changed):
There's no support for automatically deriving a schema from a list of dict (or other formats) yet. This could, as you mention, be a good convenience feature, so we add this to Weaviate's feature suggestions!