diff --git a/PARADOXES.txt b/PARADOXES.txt new file mode 100644 index 00000000..fe772166 --- /dev/null +++ b/PARADOXES.txt @@ -0,0 +1,7 @@ +gia na kano genika uniques prepei anamesa se kathe koma na yparxei space gia paradeigma: + +create table testimus1 (ena str unique , dio str) + +create table testimus1 (ena str unique , dio str primary key , tria str ) + +create index indo on testimus1(ena) using btree (MONO B TREE SUPPORTED) \ No newline at end of file diff --git a/__pycache__/mdb.cpython-310.pyc b/__pycache__/mdb.cpython-310.pyc new file mode 100644 index 00000000..5ae51076 Binary files /dev/null and b/__pycache__/mdb.cpython-310.pyc differ diff --git a/changelog.pdf b/changelog.pdf new file mode 100644 index 00000000..3e826958 Binary files /dev/null and b/changelog.pdf differ diff --git a/dbdata/smdb_db/advisor.pkl b/dbdata/smdb_db/advisor.pkl new file mode 100644 index 00000000..7e60291f Binary files /dev/null and b/dbdata/smdb_db/advisor.pkl differ diff --git a/dbdata/smdb_db/boy.pkl b/dbdata/smdb_db/boy.pkl new file mode 100644 index 00000000..00a4196c Binary files /dev/null and b/dbdata/smdb_db/boy.pkl differ diff --git a/dbdata/smdb_db/boy2.pkl b/dbdata/smdb_db/boy2.pkl new file mode 100644 index 00000000..45efdaff Binary files /dev/null and b/dbdata/smdb_db/boy2.pkl differ diff --git a/dbdata/smdb_db/classroom.pkl b/dbdata/smdb_db/classroom.pkl new file mode 100644 index 00000000..63a85753 Binary files /dev/null and b/dbdata/smdb_db/classroom.pkl differ diff --git a/dbdata/smdb_db/course.pkl b/dbdata/smdb_db/course.pkl new file mode 100644 index 00000000..ace69cd1 Binary files /dev/null and b/dbdata/smdb_db/course.pkl differ diff --git a/dbdata/smdb_db/department.pkl b/dbdata/smdb_db/department.pkl new file mode 100644 index 00000000..f41676fe Binary files /dev/null and b/dbdata/smdb_db/department.pkl differ diff --git a/dbdata/smdb_db/instructor.pkl b/dbdata/smdb_db/instructor.pkl new file mode 100644 index 00000000..4c4a6cee Binary files /dev/null and b/dbdata/smdb_db/instructor.pkl differ diff --git a/dbdata/smdb_db/meta_indexes.pkl b/dbdata/smdb_db/meta_indexes.pkl new file mode 100644 index 00000000..3d6afcbe Binary files /dev/null and b/dbdata/smdb_db/meta_indexes.pkl differ diff --git a/dbdata/smdb_db/meta_insert_stack.pkl b/dbdata/smdb_db/meta_insert_stack.pkl new file mode 100644 index 00000000..3de75117 Binary files /dev/null and b/dbdata/smdb_db/meta_insert_stack.pkl differ diff --git a/dbdata/smdb_db/meta_length.pkl b/dbdata/smdb_db/meta_length.pkl new file mode 100644 index 00000000..5f6fa04a Binary files /dev/null and b/dbdata/smdb_db/meta_length.pkl differ diff --git a/dbdata/smdb_db/meta_locks.pkl b/dbdata/smdb_db/meta_locks.pkl new file mode 100644 index 00000000..e9b6fad6 Binary files /dev/null and b/dbdata/smdb_db/meta_locks.pkl differ diff --git a/dbdata/smdb_db/prereq.pkl b/dbdata/smdb_db/prereq.pkl new file mode 100644 index 00000000..234fe214 Binary files /dev/null and b/dbdata/smdb_db/prereq.pkl differ diff --git a/dbdata/smdb_db/section.pkl b/dbdata/smdb_db/section.pkl new file mode 100644 index 00000000..38b96009 Binary files /dev/null and b/dbdata/smdb_db/section.pkl differ diff --git a/dbdata/smdb_db/student.pkl b/dbdata/smdb_db/student.pkl new file mode 100644 index 00000000..73e5677a Binary files /dev/null and b/dbdata/smdb_db/student.pkl differ diff --git a/dbdata/smdb_db/takes.pkl b/dbdata/smdb_db/takes.pkl new file mode 100644 index 00000000..25e9ece4 Binary files /dev/null and b/dbdata/smdb_db/takes.pkl differ diff --git a/dbdata/smdb_db/teaches.pkl b/dbdata/smdb_db/teaches.pkl new file mode 100644 index 00000000..ff3e0d5c Binary files /dev/null and b/dbdata/smdb_db/teaches.pkl differ diff --git a/dbdata/smdb_db/tes.pkl b/dbdata/smdb_db/tes.pkl new file mode 100644 index 00000000..1d88379f Binary files /dev/null and b/dbdata/smdb_db/tes.pkl differ diff --git a/dbdata/smdb_db/tes2.pkl b/dbdata/smdb_db/tes2.pkl new file mode 100644 index 00000000..19e8fd20 Binary files /dev/null and b/dbdata/smdb_db/tes2.pkl differ diff --git a/dbdata/smdb_db/tes3.pkl b/dbdata/smdb_db/tes3.pkl new file mode 100644 index 00000000..95a233e7 Binary files /dev/null and b/dbdata/smdb_db/tes3.pkl differ diff --git a/dbdata/smdb_db/tes4.pkl b/dbdata/smdb_db/tes4.pkl new file mode 100644 index 00000000..45661a2d Binary files /dev/null and b/dbdata/smdb_db/tes4.pkl differ diff --git a/dbdata/smdb_db/tes6.pkl b/dbdata/smdb_db/tes6.pkl new file mode 100644 index 00000000..795aa188 Binary files /dev/null and b/dbdata/smdb_db/tes6.pkl differ diff --git a/dbdata/smdb_db/tes7.pkl b/dbdata/smdb_db/tes7.pkl new file mode 100644 index 00000000..4e08843a Binary files /dev/null and b/dbdata/smdb_db/tes7.pkl differ diff --git a/dbdata/smdb_db/tes9.pkl b/dbdata/smdb_db/tes9.pkl new file mode 100644 index 00000000..ce59e579 Binary files /dev/null and b/dbdata/smdb_db/tes9.pkl differ diff --git a/dbdata/smdb_db/test1.pkl b/dbdata/smdb_db/test1.pkl new file mode 100644 index 00000000..5c454afd Binary files /dev/null and b/dbdata/smdb_db/test1.pkl differ diff --git a/dbdata/smdb_db/test2.pkl b/dbdata/smdb_db/test2.pkl new file mode 100644 index 00000000..e8b06a4e Binary files /dev/null and b/dbdata/smdb_db/test2.pkl differ diff --git a/dbdata/smdb_db/test4.pkl b/dbdata/smdb_db/test4.pkl new file mode 100644 index 00000000..536e9fd3 Binary files /dev/null and b/dbdata/smdb_db/test4.pkl differ diff --git a/dbdata/smdb_db/test5.pkl b/dbdata/smdb_db/test5.pkl new file mode 100644 index 00000000..088f4be2 Binary files /dev/null and b/dbdata/smdb_db/test5.pkl differ diff --git a/dbdata/smdb_db/test8.pkl b/dbdata/smdb_db/test8.pkl new file mode 100644 index 00000000..ed4371c9 Binary files /dev/null and b/dbdata/smdb_db/test8.pkl differ diff --git a/dbdata/smdb_db/test9.pkl b/dbdata/smdb_db/test9.pkl new file mode 100644 index 00000000..7b219dae Binary files /dev/null and b/dbdata/smdb_db/test9.pkl differ diff --git a/dbdata/smdb_db/time_slot.pkl b/dbdata/smdb_db/time_slot.pkl new file mode 100644 index 00000000..bd150378 Binary files /dev/null and b/dbdata/smdb_db/time_slot.pkl differ diff --git a/dbdata/smdb_db/uniqoz1.pkl b/dbdata/smdb_db/uniqoz1.pkl new file mode 100644 index 00000000..d1fbbc36 Binary files /dev/null and b/dbdata/smdb_db/uniqoz1.pkl differ diff --git a/index_uniques.pkl b/index_uniques.pkl new file mode 100644 index 00000000..6d38a700 Binary files /dev/null and b/index_uniques.pkl differ diff --git a/mdb.py b/mdb.py index a981e5be..820ed144 100644 --- a/mdb.py +++ b/mdb.py @@ -5,10 +5,17 @@ import readline import traceback import shutil +import pickle +import pandas as panda sys.path.append('miniDB') -from database import Database -from table import Table +from miniDB.database import Database +from miniDB.table import Table + +from miniDB.database import Database +from miniDB.table import Table + +from tabulate import tabulate # art font is "big" art = ''' _ _ _____ ____ @@ -16,7 +23,7 @@ _ __ ___ _ _ __ _ | | | || |_) | | '_ ` _ \ | || '_ \ | || | | || _ < | | | | | || || | | || || |__| || |_) | - |_| |_| |_||_||_| |_||_||_____/ |____/ 2022 + |_| |_| |_||_||_| |_||_||_____/ |____/ 2023 ''' @@ -39,6 +46,9 @@ def in_paren(qsplit, ind): def create_query_plan(query, keywords, action): + #print('2.0 Create QueryPlan Start') + #print("2.1 query: ",query) + #print("2.1.1 keywords, should be identical to 1.2: ",keywords) ''' Given a query, the set of keywords that we expect to pe present and the overall action, return the query plan for this query. @@ -46,8 +56,10 @@ def create_query_plan(query, keywords, action): ''' dic = {val: None for val in keywords if val!=';'} + #print("2.2 dic: ",dic) # crafts the dictionary template with none values for each key - ql = [val for val in query.split(' ') if val !=''] + ql = [val for val in query.split(' ') if val !=''] # ql stands for querylist, its the query, but in list form + #print("2.3 ql: ",ql) kw_in_query = [] kw_positions = [] @@ -66,16 +78,22 @@ def create_query_plan(query, keywords, action): ql.pop(i+1) kw_positions.append(i) i+=1 + #print ("kw_in_query: ",kw_in_query) + #print ("kw_positions: ",kw_positions) for i in range(len(kw_in_query)-1): dic[kw_in_query[i]] = ' '.join(ql[kw_positions[i]+1:kw_positions[i+1]]) + #print ("dic[kw_in_query[i]]: ",dic[kw_in_query[i]]) + #print("dic after loop: ",dic) if action == 'create view': + #print('ACTION = CREATE VIEW') dic['as'] = interpret(dic['as']) if action=='select': + #print('ACTION = SELECT') dic = evaluate_from_clause(dic) if dic['distinct'] is not None: @@ -94,34 +112,94 @@ def create_query_plan(query, keywords, action): dic['desc'] = None if action=='create table': + #print('ACTION = CREATE TABLE') + + query_list = query.split() + table_index = query_list.index("table") + tab_name = query_list[table_index + 1] args = dic['create table'][dic['create table'].index('('):dic['create table'].index(')')+1] + dic['create table'] = dic['create table'].removesuffix(args).strip() arg_nopk = args.replace('primary key', '')[1:-1] + arg_noUnique=args.replace('unique', '')[1:-1] arglist = [val.strip().split(' ') for val in arg_nopk.split(',')] + dic['column_names'] = ','.join([val[0] for val in arglist]) dic['column_types'] = ','.join([val[1] for val in arglist]) + + keyboy=0 if 'primary key' in args: arglist = args[1:-1].split(' ') dic['primary key'] = arglist[arglist.index('primary')-2] + keyboy=arglist[arglist.index('primary')-2] + else: dic['primary key'] = None + + + if 'unique' in arg_nopk: + ''' + Here we check if there are unique arguments in arg nopk and if there is, + we create pkl fine to add them in , so that we can later use them and change them + in case of deletion or to check if there is indeed a unique column when trying to index a + unique column using B+Tree indexing + ''' + split_arg = arg_nopk.split() + + row1= split_arg[split_arg.index('unique') - 2] + + table1=tab_name + data = {"tab_name": table1,"primary_key":keyboy, "unique_column": row1} + #data=row1,'',keyboy + + ''' + Here we start creating the file and checking each case , + if there is a file etc + ''' + if 'unique_table' in locals(): + + dataFR=unique_table + + + + elif os.path.isfile('./unique_table.pkl'): + dataFR=panda.read_pickle('./unique_table.pkl') + + else: + dataFR = panda.DataFrame(columns=['tab_name', 'primary_key', 'unique_column']) + + + + dataFR=dataFR.append({'tab_name': table1, 'primary_key': keyboy, 'unique_column': row1},ignore_index=True) + + unique_table=dataFR + dataFR.to_pickle('./unique_table.pkl') if action=='import': + #print('ACTION = IMPORT') dic = {'import table' if key=='import' else key: val for key, val in dic.items()} if action=='insert into': + #print('ACTION = INSERT INTO') if dic['values'][0] == '(' and dic['values'][-1] == ')': dic['values'] = dic['values'][1:-1] else: - raise ValueError('Your parens are not right m8') + raise ValueError('Your parens are not right m8') #wut? if action=='unlock table': + #print('ACTION= UNLOCK TABLE') if dic['force'] is not None: dic['force'] = True else: dic['force'] = False return dic +def create_and_write_to_file(file_name, content): + with open(file_name, "w") as file: + file.write(content) + print(f"File '{file_name}' created and written to successfully!") + +#create_and_write_to_file("new_file.txt", "This is the content written to the file.") @@ -161,9 +239,47 @@ def evaluate_from_clause(dic): return dic def interpret(query): + #print('1. Interpret Start') ''' Interpret the query. ''' + #INTERVENTION FOR B+TREE INDEX ON A UNIQUE COLUMN + ''' + The way we are checking and interpreting the query of : + create index INDEX_NAME on TABLE_THAT_HAS_A_UNIQUE_COLUMN(UNIQUE_COLUMN_NAME) using btree --->(ONLY B+TREE SUPPORTED) + is to check if the inserted query is equal to similar1 and then we make an overwrite on the index unique file + the new overwriten data will be used inside the Database.py file to check before creating an index + if the new index is create on a unique column or on its PK + ''' + + similar1 = r"create index (\w+) on (\w+)\((\w+)\) using btree" + similar_to= re.search(similar1,query) + ok='ok' + if similar_to: + + index_name=similar_to.group(1) + table_name=similar_to.group(2) + table_column=similar_to.group(3) + print(index_name," ",table_name," ",table_column) + if os.path.isfile('./index_uniques.pkl'): + dataFR5=panda.read_pickle('./index_uniques.pkl') + dataFR5.loc[0]=table_name,table_column,index_name + dataFR5.to_pickle('./index_uniques.pkl') + + dataFR5=panda.read_pickle('index_uniques.pkl') + + else: + dataFR5=panda.DataFrame(columns=['table_name','table_column','index_name']) + dataFR5.loc[0]=table_name,table_column,index_name + dataFR5.to_pickle('./index_uniques.pkl') + dataFR5=panda.read_pickle('index_uniques.pkl') + + + + + + + #keyword per action? kw_per_action = {'create table': ['create table'], 'drop table': ['drop table'], 'cast': ['cast', 'from', 'to'], @@ -171,6 +287,7 @@ def interpret(query): 'export': ['export', 'to'], 'insert into': ['insert into', 'values'], 'select': ['select', 'from', 'where', 'distinct', 'order by', 'limit'], + #'select': ['select', 'from', 'where', 'distinct', 'between', 'and', 'order by', 'limit'], 'lock table': ['lock table', 'mode'], 'unlock table': ['unlock table', 'force'], 'delete from': ['delete from', 'where'], @@ -178,18 +295,67 @@ def interpret(query): 'create index': ['create index', 'on', 'using'], 'drop index': ['drop index'], 'create view' : ['create view', 'as'] - } + } # ta keys einai ta aristera, to eidos. - if query[-1]!=';': + if query[-1]!=';': # add ; if it doesnt exist (at the end) query+=';' - query = query.replace("(", " ( ").replace(")", " ) ").replace(";", " ;").strip() + query = query.replace("(", " ( ").replace(")", " ) ").replace(";", " ;").strip() # vazei kena gia kapio logo deksia-aristera apo tis parenthesis ( kai ), to idio kai gia to ;, vazei ena keno prin, stin periptosi poy kolitike example; ==> example ; + + # for kw in kw_per_action.keys(): #gia kathe key sta keys psakse + # if query.startswith(kw): #finds what type of action we have to deal with by checking the first string that exists inside the query + # action = kw # example: query string starts with: "select ..." --> action = "select" + # print ("1.1 action type: ",action) + + # #print("1.2 kw_per_action[action]+[';'] == ",kw_per_action[action]+[';']) + # return create_query_plan(query, kw_per_action[action]+[';'], action) + multipleQueries=False + word_query=query.split() #splits query into words + for keyword in word_query: # detects if query has and's/or's + if(keyword=="or" or keyword=="and"): + multipleQueries=True + logical_operator=keyword + + if(multipleQueries): #if the query contains and's/or's, splits the query into smaller individual ones, then passes them for query planning + base_query=[] + for word in word_query: # forming the base query (query start until "where") + base_query.append(word) + if(word=='where'): + stopIndex = word_query.index("where") # will need the index later + break # base query has now been formed + tails=[] # each logical operator equals one more independent query to construct + query_tail=[] # reformed queries = query_base + query tail + for i in range(stopIndex+1,len(word_query)): # continue right after "where" + if(word_query[i]=='or' or word_query[i]=='and'): + query_tail.append(";") # end of query + tails.append(query_tail) + query_tail=[] #if or/and is found save appended tail to list, clear tail and continue with the next one (if any) + else: + query_tail.append(word_query[i]) + tails.append(query_tail) #last tail + #print (*tails) + formed_queries=[] + for query_tail in tails: # connect base_query + query_tails to form final independent queries + formed_queries.append(base_query+query_tail) + #print ("reformed QUERY",*formed_queries) + final_query = [] + for ref_query in formed_queries: # finalizing query form for query planning + ref_query = ' '.join(ref_query) + final_query.append(ref_query) + query_plans=[] + for query in final_query: # query plans + for kw in kw_per_action.keys(): + if query.startswith(kw): + action = kw + query_plans.append(create_query_plan(query, kw_per_action[action]+[';'], action)) + return query_plans,logical_operator # each query plan goes hand to hand with a logical operator indication in order to be handled accordingly + else: + for kw in kw_per_action.keys(): + if query.startswith(kw): + action = kw + return create_query_plan(query, kw_per_action[action]+[';'], action),'none' # regular query = tag for logical operator will be 'none' - for kw in kw_per_action.keys(): - if query.startswith(kw): - action = kw - return create_query_plan(query, kw_per_action[action]+[';'], action) def execute_dic(dic): ''' @@ -282,17 +448,72 @@ def remove_db(db_name): print('\nbye!') break try: - if line=='exit': + #print('0. This is program Start') + if line=='exit': # exit program break - if line.split(' ')[0].removesuffix(';') in ['lsdb', 'lstb', 'cdb', 'rmdb']: + if line.split(' ')[0].removesuffix(';') in ['lsdb', 'lstb', 'cdb', 'rmdb']: #only for meta commands interpret_meta(line) elif line.startswith('explain'): dic = interpret(line.removeprefix('explain ')) pprint(dic, sort_dicts=False) - else: - dic = interpret(line) - result = execute_dic(dic) - if isinstance(result,Table): - result.show() - except Exception: + else: # normal operation goes here + # print("--- INTERPRET START ----") + # dic = interpret(line) + # print("--- EXECUTE START --- ") + # result = execute_dic(dic) + # #print("--- TRY SHOW RESULT---") + # showResult = False + # if isinstance(result,Table): + # showResult = True + # print("--- SHOW RESULT SUCCESS ---") + # result.show() + # if (showResult==False): + # print("---SHOW RESULT FAIL--- (isinstance(result,Table)==False)") + results=[] + result_table=[] + temp_table = [] + + allQueryResults = [] + dic,logical_operator = interpret(line) + #print("SPECIAL",logical_operator,"DIC",dic) + if(logical_operator=='none'): #no change + result = execute_dic(dic) + if isinstance(result,Table): + result.show() + elif(logical_operator=='and'): # and operations + for query in dic: + results.append(execute_dic(query)) + for r in results: + header,qr=r.show(print_output=False) + #print("QueryResult:",qr) + allQueryResults.append(qr) + for qr in allQueryResults: + for row in qr: + if (row in temp_table): + result_table.append(row) + #print(row,"(AND) DUPE - OK") # debug + else: + temp_table.append(row) + #print(row,"(AND) Denied") # debug + print(tabulate(result_table[:None], headers=header)+'\n') + elif(logical_operator=='or'): # or operations + results=[] + result_table=[] + allQueryResults = [] + for query in dic: + results.append(execute_dic(query)) + for r in results: + header,qr=r.show(print_output=False) + #print("QueryResult:",qr) + allQueryResults.append(qr) + for qr in allQueryResults: + for row in qr: + if (row not in result_table): + result_table.append(row) + #print(row,"(OR) OK") # debug + else: + #print(row,"(OR) DUPLICATE (Denied)") # debug + pass + print(tabulate(result_table[:None], headers=header)+'\n') + except Exception: # errors print(traceback.format_exc()) diff --git a/meta_index_type.pkl b/meta_index_type.pkl new file mode 100644 index 00000000..fd35648a Binary files /dev/null and b/meta_index_type.pkl differ diff --git a/miniDB/__pycache__/__init__.cpython-310.pyc b/miniDB/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 00000000..a8d73f7d Binary files /dev/null and b/miniDB/__pycache__/__init__.cpython-310.pyc differ diff --git a/miniDB/__pycache__/btree.cpython-310.pyc b/miniDB/__pycache__/btree.cpython-310.pyc new file mode 100644 index 00000000..d70c7407 Binary files /dev/null and b/miniDB/__pycache__/btree.cpython-310.pyc differ diff --git a/miniDB/__pycache__/database.cpython-310.pyc b/miniDB/__pycache__/database.cpython-310.pyc new file mode 100644 index 00000000..d3fafb0e Binary files /dev/null and b/miniDB/__pycache__/database.cpython-310.pyc differ diff --git a/miniDB/__pycache__/joins.cpython-310.pyc b/miniDB/__pycache__/joins.cpython-310.pyc new file mode 100644 index 00000000..1a4fe85c Binary files /dev/null and b/miniDB/__pycache__/joins.cpython-310.pyc differ diff --git a/miniDB/__pycache__/misc.cpython-310.pyc b/miniDB/__pycache__/misc.cpython-310.pyc new file mode 100644 index 00000000..55e3f5da Binary files /dev/null and b/miniDB/__pycache__/misc.cpython-310.pyc differ diff --git a/miniDB/__pycache__/table.cpython-310.pyc b/miniDB/__pycache__/table.cpython-310.pyc new file mode 100644 index 00000000..f266e2b0 Binary files /dev/null and b/miniDB/__pycache__/table.cpython-310.pyc differ diff --git a/miniDB/btree.py b/miniDB/btree.py index f0676209..b0d0152b 100644 --- a/miniDB/btree.py +++ b/miniDB/btree.py @@ -7,6 +7,8 @@ class Node: Node abstraction. Represents a single bucket ''' def __init__(self, b, values=None, ptrs=None,left_sibling=None, right_sibling=None, parent=None, is_leaf=False): + + self.b = b # branching factor self.values = [] if values is None else values # Values (the data from the pk column) self.ptrs = [] if ptrs is None else ptrs # ptrs (the indexes of each datapoint or the index of another bucket) diff --git a/miniDB/database.py b/miniDB/database.py index a3ac6be7..008bed75 100644 --- a/miniDB/database.py +++ b/miniDB/database.py @@ -5,8 +5,9 @@ import logging import warnings import readline +import pandas as panda from tabulate import tabulate - +import re sys.path.append(f'{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}/miniDB') from miniDB import table sys.modules['table'] = table @@ -75,7 +76,6 @@ def _save_locks(self): def load_database(self): ''' Load all tables that are part of the database (indices noted here are loaded). - Args: path: string. Directory (path) of the database on the system. ''' @@ -104,7 +104,6 @@ def _update(self): def create_table(self, name, column_names, column_types, primary_key=None, load=None): ''' This method create a new table. This table is saved and can be accessed via db_object.tables['table_name'] or db_object.table_name - Args: name: string. Name of table. column_names: list. Names of columns. @@ -113,21 +112,59 @@ def create_table(self, name, column_names, column_types, primary_key=None, load= load: boolean. Defines table object parameters as the name of the table and the column names. ''' # print('here -> ', column_names.split(',')) + ''' + if 'meta_index_type' not in self.tables: + self.tables.update({name: Table(name=name, column_names=column_names, column_types=column_types, primary_key=primary_key, load=load)}) + ''' self.tables.update({name: Table(name=name, column_names=column_names.split(','), column_types=column_types.split(','), primary_key=primary_key, load=load)}) - # self._name = Table(name=name, column_names=column_names, column_types=column_types, load=load) - # check that new dynamic var doesnt exist already - # self.no_of_tables += 1 + self._update() self.save_database() # (self.tables[name]) + if self.verbose: print(f'Created table "{name}".') - def drop_table(self, table_name): ''' Drop table from current database. + Args: + table_name: string. Name of table. + ''' + self.load_database() + self.lock_table(table_name) + self.tables.pop(table_name) + if os.path.isfile(f'{self.savedir}/{table_name}.pkl'): + os.remove(f'{self.savedir}/{table_name}.pkl') + else: + warnings.warn(f'"{self.savedir}/{table_name}.pkl" not found.') + self.delete_from('meta_locks', f'table_name={table_name}') + self.delete_from('meta_length', f'table_name={table_name}') + self.delete_from('meta_insert_stack', f'table_name={table_name}') + + if self._has_index(table_name): + to_be_deleted = [] + for key, table in enumerate(self.tables['meta_indexes'].column_by_name('table_name')): + + if table == table_name: + to_be_deleted.append(key) + + for i in reversed(to_be_deleted): + self.drop_index(self.tables['meta_indexes'].data[i][1]) + + try: + delattr(self, table_name) + except AttributeError: + pass + # self._update() + + self.save_database() + + + def drop_table(self, table_name): + ''' + Drop table from current database. Args: table_name: string. Name of table. ''' @@ -143,6 +180,49 @@ def drop_table(self, table_name): self.delete_from('meta_length', f'table_name={table_name}') self.delete_from('meta_insert_stack', f'table_name={table_name}') + #INTERVENTION TO CHECK IF TABLE TO BE DELETED HAS INSIDE IT A UNIQUE COLUMN + if os.path.isfile('./unique_table.pkl'): + ''' + Here we check inside the unique_table.pkl and if there is a row with a table name that + is equal to the table name of the table that is to be deleted , we delete the row + ''' + dataFR=panda.read_pickle('./unique_table.pkl') + #insert code that will delete the row where table_name = key in column named tab_name + dataFR = dataFR[dataFR.tab_name != table_name] + dataFR.to_pickle('./unique_table.pkl') + + else: + print('') + + #INTERVENTION TO CHECK IF TABLE TO BE DELETED IS INSIDE THE INDEX TYPE PKL FILE + if os.path.isfile('./meta_index_type.pkl'): + ''' + Here we check inside the meta_index_type.pkl and if there is a row with a table name that + is equal to the table name of the table that is to be deleted , we delete the row + ''' + dataFR1=panda.read_pickle('./meta_index_type.pkl') + dataFR1 = dataFR1[dataFR1.table_name != table_name] + dataFR1.to_pickle('./meta_index_type.pkl') + + + else: + print('') + + + #INTERVENTION TO CHECK IF TABLE TO BE DELETED HAS INDEX ON A UNIQUE COLUMN THEN WE DELETE DATA ROW OF PKL TABLE + if os.path.isfile('./index_uniques.pkl'): + ''' + Here we check inside the index_uniques.pkl and if there is a row with a table name that + is equal to the table name of the table that is to be deleted , we delete the row + ''' + dataFR2=panda.read_pickle('./index_uniques.pkl') + dataFR2 = dataFR2[dataFR2.table_name != table_name] + dataFR2.to_pickle('./index_uniques.pkl') + + else: + print('') + + if self._has_index(table_name): to_be_deleted = [] for key, table in enumerate(self.tables['meta_indexes'].column_by_name('table_name')): @@ -163,7 +243,6 @@ def drop_table(self, table_name): def import_table(self, table_name, filename, column_types=None, primary_key=None): ''' Creates table from CSV file. - Args: filename: string. CSV filename. If not specified, filename's name will be used. column_types: list. Types of columns. If not specified, all will be set to type str. @@ -192,7 +271,6 @@ def import_table(self, table_name, filename, column_types=None, primary_key=None def export(self, table_name, filename=None): ''' Transform table to CSV. - Args: table_name: string. Name of table. filename: string. Output CSV filename. @@ -210,7 +288,6 @@ def export(self, table_name, filename=None): def table_from_object(self, new_table): ''' Add table object to database. - Args: new_table: string. Name of new table. ''' @@ -239,7 +316,6 @@ def cast(self, column_name, table_name, cast_type): ''' Modify the type of the specified column and cast all prexisting values. (Executes type() for every value in column and saves) - Args: table_name: string. Name of table (must be part of database). column_name: string. The column that will be casted (must be part of database). @@ -257,20 +333,27 @@ def cast(self, column_name, table_name, cast_type): def insert_into(self, table_name, row_str): ''' Inserts data to given table. - Args: table_name: string. Name of table (must be part of database). row: list. A list of values to be inserted (will be casted to a predifined type automatically). lock_load_save: boolean. If False, user needs to load, lock and save the states of the database (CAUTION). Useful for bulk-loading. ''' + row = row_str.strip().split(',') + + #INTERVENTION HERE WE CHECK IF TABLE HAS UNIQUE ROW AND IF YES THIS FUNCTION CHEKCS FOR SIMILAR DATA AND IF YES IT RAISES EXCEPTIONS + self._check_unique(row,table_name) + self.load_database() # fetch the insert_stack. For more info on the insert_stack # check the insert_stack meta table lock_ownership = self.lock_table(table_name, mode='x') insert_stack = self._get_insert_stack_for_table(table_name) + try: + #self._check_unique(row,table_name) self.tables[table_name]._insert(row, insert_stack) + except Exception as e: logging.info(e) logging.info('ABORTED') @@ -285,7 +368,6 @@ def insert_into(self, table_name, row_str): def update_table(self, table_name, set_args, condition): ''' Update the value of a column where a condition is met. - Args: table_name: string. Name of table (must be part of database). set_value: string. New value of the predifined column name. @@ -309,7 +391,6 @@ def update_table(self, table_name, set_args, condition): def delete_from(self, table_name, condition): ''' Delete rows of table where condition is met. - Args: table_name: string. Name of table (must be part of database). condition: string. A condition using the following format: @@ -330,12 +411,29 @@ def delete_from(self, table_name, condition): if table_name[:4]!='meta': self._add_to_insert_stack(table_name, deleted) self.save_database() - + + def check_metaboy(self,table_name): + ''' + Function used to check for data inside pkl table quicker and returns boolean value + ''' + if os.path.isfile('./unique_table.pkl'): + + dataFr1=panda.read_pickle('./unique_table.pkl') + + searcher=(dataFr1['tab_name']==table_name) + res2=dataFr1[searcher] + if res2.empty: + print('') + return(res2.empty) + + else: + print(res2.empty) + return(res2.empty) + def select(self, columns, table_name, condition, distinct=None, order_by=None, \ limit=True, desc=None, save_as=None, return_object=True): ''' Selects and outputs a table's data where condtion is met. - Args: table_name: string. Name of table (must be part of database). columns: list. The columns that will be part of the output table (use '*' to select all available columns) @@ -351,27 +449,146 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ return_object: boolean. If True, the result will be a table object (useful for internal use - the result will be printed by default). distinct: boolean. If True, the resulting table will contain only unique rows. ''' - - # print(table_name) + self.load_database() if isinstance(table_name,Table): + return table_name._select_where(columns, condition, distinct, order_by, desc, limit) + if condition is not None: condition_column = split_condition(condition)[0] else: condition_column = '' - + + #INTERVENTION TO GET INDEX NAME IF TABLE WE ARE SELECTINF FROM , HAS ONE + index_name = self.get_index_name(table_name) + + + + #INTERVENTION HERE WE CHECK TO GET DATA FROM THE PKL TABLE CONTAINING INDEX TYPES + if os.path.isfile('./meta_index_type.pkl'): + ''' + Checking for an index type if there is one coresponding to the table name we are checking now , + we append its value to the variable index_type + ''' + dataFR=panda.read_pickle('./meta_index_type.pkl') + searcher=(dataFR['table_name']==table_name) + res=dataFR[searcher] + if res.empty: + print('') + + else: + print(" ") + unique_boy1=res.iloc[0]['index_type'] + + index_type=unique_boy1 + + + + # self.lock_table(table_name, mode='x') if self.is_locked(table_name): + print('') return - if self._has_index(table_name) and condition_column==self.tables[table_name].column_names[self.tables[table_name].pk_idx]: - index_name = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_name')[0] - bt = self._load_idx(index_name) - table = self.tables[table_name]._select_where_with_btree(columns, bt, condition, distinct, order_by, desc, limit) + + + table_chekpoint=table_name + #INTERVENTION TO CHECK FOR FOR A UNIQUE COLUMN WITH INDEX ON IT ON THE UNIQUE TABLES + checker=self.check_metaboy(table_chekpoint) + if os.path.isfile('./unique_table.pkl') and checker==False: + + dataFr1=panda.read_pickle('./unique_table.pkl') + + searcher=(dataFr1['tab_name']==table_chekpoint) + res22=dataFr1[searcher] + if res22.empty: + print('') + + else: + print("") + + + + + if os.path.isfile('./unique_table.pkl') and res22.empty==False: + + dataFr=panda.read_pickle('./unique_table.pkl') + print(table_chekpoint) + searcher=(dataFr['tab_name']==table_chekpoint) + res1=dataFr[searcher] + if res1.empty: + print('') + + else: + print("") + + + + + + if table_chekpoint in self._has_index(table_chekpoint) and res1.empty==False: + ''' + Here we have reched the point of a search on a column that is unique and has b+tree index , then we + continue to execute the query with its arguments + ''' + if index_type == 'btree': + print("Select using b+tree with a unique column") + + #index_name = self.select('*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_name')[0] + if condition_column=='': + + table = self.tables[table_name]._select_where( + columns, condition, distinct, order_by, desc, limit) + else: + bt = self._load_idx(index_name) + print(bt) + table = self.tables[table_name]._select_where_with_btree(columns, bt, condition, distinct, order_by, desc, limit) + else: + table = self.tables[table_name]._select_where(columns, condition, distinct, order_by, desc, limit) + # self.unlock_table(table_name) + if save_as is not None: + table._name = save_as + self.table_from_object(table) + else: + if return_object: + return table + else: + return table.show() + + elif table_name in self._has_index(table_name) and condition_column == self.tables[table_name].column_names[self.tables[table_name].pk_idx]: + + + if index_type == 'hash': + + ''' + Here we have reached the point where we are searcing on the pk of table that has hash index on its pk column + ''' + + indexer=self.tables['meta_indexes'].column_by_name('table_name').index(table_name) + index_name = self.tables['meta_indexes'].column_by_name('index_name')[indexer] + + + print('SELECT WHERE HASHINDEX ') + bt = self._load_idx(index_name) + + + table = self.tables[table_name]._select_where_with_hashindex(columns, bt, condition, distinct, order_by, desc, limit) + elif index_type == 'btree': + ''' + Here we have reached the point where we are searcing on the pk of table that has b+tree index on its pk column + ''' + index_name = self.select( + '*', 'meta_indexes', f'table_name={table_name}', return_object=True).column_by_name('index_name')[0] + + bt = self._load_idx(index_name) + + table = self.tables[table_name]._select_where_with_btree(columns, bt, condition, distinct, order_by, desc, limit) + #elif self._has_index(table_name) and condition_column == self.tables[table_name].column_names[self.tables[table_name].pk_idx] else: - table = self.tables[table_name]._select_where(columns, condition, distinct, order_by, desc, limit) + table = self.tables[table_name]._select_where( + columns, condition, distinct, order_by, desc, limit) # self.unlock_table(table_name) if save_as is not None: table._name = save_as @@ -386,7 +603,6 @@ def select(self, columns, table_name, condition, distinct=None, order_by=None, \ def show_table(self, table_name, no_of_rows=None): ''' Print table in a readable tabular design (using tabulate). - Args: table_name: string. Name of table (must be part of database). ''' @@ -398,7 +614,6 @@ def show_table(self, table_name, no_of_rows=None): def sort(self, table_name, column_name, asc=False): ''' Sorts a table based on a column. - Args: table_name: string. Name of table (must be part of database). column_name: string. the column name that will be used to sort. @@ -417,7 +632,6 @@ def sort(self, table_name, column_name, asc=False): def create_view(self, table_name, table): ''' Create a virtual table based on the result-set of the SQL statement provided. - Args: table_name: string. Name of the table that will be saved. table: table. The table that will be saved. @@ -428,7 +642,6 @@ def create_view(self, table_name, table): def join(self, mode, left_table, right_table, condition, save_as=None, return_object=True): ''' Join two tables that are part of the database where condition is met. - Args: left_table: string. Name of the left table (must be in DB) or Table obj. right_table: string. Name of the right table (must be in DB) or Table obj. @@ -496,9 +709,9 @@ def join(self, mode, left_table, right_table, condition, save_as=None, return_ob res.show() def lock_table(self, table_name, mode='x'): + ''' Locks the specified table using the exclusive lock (X). - Args: table_name: string. Table name (must be part of database). ''' @@ -529,7 +742,6 @@ def lock_table(self, table_name, mode='x'): def unlock_table(self, table_name, force=False): ''' Unlocks the specified table that is exclusively locked (X). - Args: table_name: string. Table name (must be part of database). ''' @@ -551,7 +763,6 @@ def unlock_table(self, table_name, force=False): def is_locked(self, table_name): ''' Check whether the specified table is exclusively locked (X). - Args: table_name: string. Table name (must be part of database). ''' @@ -616,10 +827,61 @@ def _update_meta_insert_stack(self): self.tables['meta_insert_stack']._insert([table._name, []]) + + + def _check_unique(self, row, table_name): + + ''' + A function to check if there are any unique constraints in a table and ensure that the new data being inserted does not violate any of these constraints. + this happens by checking the temporary data that has been inserted in the table , we extract the needed data , table name etc from the unique table.pkl + Args: + row=row in def_insert into + table_name= name of table data is to be inserted into + + ''' + + #current table columns + columns_all_now = self.tables[table_name].column_names + + if os.path.isfile('./unique_table.pkl'): + + dataFr=panda.read_pickle('./unique_table.pkl') + searcher=(dataFr['tab_name']==table_name) + res=dataFr[searcher] + if res.empty: + print('') + + else: + print("Unique column found") + unique_boy1=res.iloc[0]['unique_column'] + + + #the unique ones + + + #the data + data_now = self.tables[table_name].data + + for i in range (len(columns_all_now)): + if columns_all_now[i]==unique_boy1: + for f in range(len(data_now)): + for h in range(len(data_now[f])): + if row[i]==data_now[f][h]: + raise Exception("error value exsists in unique column no insertion will be done ") + + + + + + + + + + + def _add_to_insert_stack(self, table_name, indexes): ''' Adds provided indices to the insert stack of the specified table. - Args: table_name: string. Table name (must be part of database). indexes: list. The list of indices that will be added to the insert stack (the indices of the newly deleted elements). @@ -630,7 +892,6 @@ def _add_to_insert_stack(self, table_name, indexes): def _get_insert_stack_for_table(self, table_name): ''' Returns the insert stack of the specified table. - Args: table_name: string. Table name (must be part of database). ''' @@ -641,7 +902,6 @@ def _get_insert_stack_for_table(self, table_name): def _update_meta_insert_stack_for_tb(self, table_name, new_stack): ''' Replaces the insert stack of a table with the one supplied by the user. - Args: table_name: string. Table name (must be part of database). new_stack: string. The stack that will be used to replace the existing one. @@ -650,92 +910,389 @@ def _update_meta_insert_stack_for_tb(self, table_name, new_stack): # indexes - def create_index(self, index_name, table_name, index_type='btree'): + def create_index(self, index_name, table_name, index_type): + + ''' Creates an index on a specified table with a given name. Important: An index can only be created on a primary key (the user does not specify the column). - Args: table_name: string. Table name (must be part of database). index_name: string. Name of the created index. ''' - if self.tables[table_name].pk_idx is None: # if no primary key, no index - raise Exception('Cannot create index. Table has no primary key.') + + + ''' + if 'meta_index_type' not in self.tables: + self.create_table('meta_index_type', [ + 'table_name', 'index_name', 'index_type'], [str, str, str]) + ''' + + + + + + + + + #INTERVENTION , HERE WE BEGIN CHECKING IF THE INDEX WAS CREATED ON A UNIQUE COLUMN USING B TREE USING THE DATA OF THE INDEX UNIQUES THE IS CREATED WHEN SOMEONE + #CREATE INDEX INDEX_NAME ON TABLE_NAME(UNIQUE COLUMN NAME) USING BTREE + if os.path.isfile('./index_uniques.pkl'): + ''' + Here begins the creation or usage of a pkl file that has inside of it table names the unique table column and the name of the index + ''' + dataFR6=panda.read_pickle('./index_uniques.pkl') + tab = dataFR6["table_name"].to_string(index=False) + un_col = dataFR6["table_column"].to_string(index=False) + index_n = dataFR6["index_name"].to_string(index=False) + if os.path.isfile('./unique_table.pkl'): + dataFr=panda.read_pickle('./unique_table.pkl') + searcher=(dataFr['tab_name']==table_name.split(" ")[0]) + res=dataFr[searcher] + + if res.empty: + print('') + + else: + unique_boy=res.iloc[0]['unique_column'] + if tab==table_name.split(" ")[0] and un_col==unique_boy and index_name==index_n: + if index_name not in self.tables['meta_indexes'].column_by_name('index_name'): + if index_type == 'btree': + print('') + logging.info('Creating Btree index.') + + # insert a record with the name of the index and the table on which it's created to the meta_indexes table + self.tables['meta_indexes']._insert([table_name.split(" ")[0], index_name]) + + if os.path.isfile('./meta_index_type.pkl'): + dataFR=panda.read_pickle('./meta_index_type.pkl') + + else: + dataFR = panda.DataFrame(columns=['table_name', 'index_type','index_name']) + + + dataFR=dataFR.append({'table_name': tab, 'index_type': index_type.lower(),'index_name':index_name},ignore_index=True) + dataFR.to_pickle('./meta_index_type.pkl') + + + + + + + + # crate the actual index + true = 1 + self._construct_index_for_uniques_btree(self, table_name.split(" ")[0], index_name, index_type) + + self.save_database() + return + else: + print('') + + + + + + #INTERVENTION TO CHECK IF TABLE HAS UNIQUE COLUMN AT LEAST + if os.path.isfile('./unique_table.pkl'): + dataFr=panda.read_pickle('./unique_table.pkl') + searcher=(dataFr['tab_name']==table_name) + res=dataFr[searcher] + + if res.empty: + print('') + else: + unique_boy=res.iloc[0]['unique_column'] + + + + if self.tables[table_name].pk_idx is None : # if no primary key, + raise Exception('Cannot create index. Table has no primary key or unique values.') + + + + + + #if self.tables[table_name].pk_idx is None and res.empty==False: + if index_name not in self.tables['meta_indexes'].column_by_name('index_name'): - # currently only btree is supported. This can be changed by adding another if. - if index_type=='btree': + if index_type == 'btree': logging.info('Creating Btree index.') + # insert a record with the name of the index and the table on which it's created to the meta_indexes table self.tables['meta_indexes']._insert([table_name, index_name]) + #self.tables['meta_index_type']._insert([table_name, index_name, index_type.lower()]) + if os.path.isfile('./meta_index_type.pkl'): + dataFR=panda.read_pickle('./meta_index_type.pkl') + + else: + dataFR = panda.DataFrame(columns=['table_name', 'index_type', 'index_name']) + + + dataFR=dataFR.append({'table_name': table_name, 'index_type': index_type.lower(),'index_name':index_name},ignore_index=True) + dataFR.to_pickle('./meta_index_type.pkl') + + + # crate the actual index - self._construct_index(table_name, index_name) + + true = 1 + self._construct_index(self, table_name, index_name, index_type) + + + print('index made') + self.save_database() + + + if index_type == 'hash': + + + + + type = index_type + + logging.info('Creating hash index.') + # insert a record with the name of the index and the table on which it's created to the meta_indexes table + self.tables['meta_indexes']._insert([table_name, index_name]) + + + + true = 1 + # crate the actual index + + self._construct_index(table_name, type, self, index_name) + if os.path.isfile('./meta_index_type.pkl'): + dataFR=panda.read_pickle('./meta_index_type.pkl') + + else: + dataFR = panda.DataFrame(columns=['table_name', 'index_type', 'index_name']) + + + dataFR=dataFR.append({'table_name': table_name, 'index_type': index_type.lower(),'index_name':index_name},ignore_index=True) + dataFR.to_pickle('./meta_index_type.pkl') + + self.save_database() + print('Index made') + + return + else: - raise Exception('Cannot create index. Another index with the same name already exists.') + raise Exception( + 'Cannot create index. Another index with the same name already exists.') + - def _construct_index(self, table_name, index_name): + def _construct_index_for_uniques_btree(table_name, index_type, self, index_name, lsb=True): + + ''' + This function creates an index on unique column using btree , its diferent as the construct index in a way that it chekcks + data inside the unique_table.pkl ffile in order to extract the column to be indexed etc. + Args: + table_name: string. Table name (must be part of database). + index_name: string. Name of the created index. ''' - Construct a btree on a table and save. + + index_type1 = lsb + index_name1 = lsb + table_name1 = self + self1 = index_type + + self = self1 + index_type = index_type1 + table_name = table_name1 + #index_name = index_name1 + + + + if index_type == 'btree': + bt = Btree(3) # 3 is arbitrary + + + + if os.path.isfile('./unique_table.pkl'): + + dataFr=panda.read_pickle('./unique_table.pkl') + searcher=(dataFr['tab_name']==table_name) + res=dataFr[searcher] + if res.empty: + print('') + + else: + + unique_boy1=res.iloc[0]['unique_column'] + + + + + + + # for each record in the primary key of the table, insert its value and index to the btree + for idx, key in enumerate(self.tables[table_name].column_by_name(unique_boy1)): + if key is None: + continue + bt.insert(key, idx) + print('index made on unique column') + # save the btree + self._save_index(index_name, bt) + + + + + + + + + def _construct_index(table_name, index_type, self, index_name, lsb=True): + ''' + Construct a hash or btree on a table and save depending on index type. Args: table_name: string. Table name (must be part of database). index_name: string. Name of the created index. + index_type: string. Type of index ''' - bt = Btree(3) # 3 is arbitrary - # for each record in the primary key of the table, insert its value and index to the btree - for idx, key in enumerate(self.tables[table_name].column_by_name(self.tables[table_name].pk)): - if key is None: - continue - bt.insert(key, idx) - # save the btree - self._save_index(index_name, bt) + #some confusions on the variables so i had to rearange + index_type1 = lsb + index_name1 = lsb + table_name1 = self + self1 = index_type + + self = self1 + index_type = index_type1 + table_name = table_name1 + #index_name = index_name1 + + + backup_table_name=self + backup_index_name=index_type + backup_index_type=table_name + backup_self=index_name + #print(backup_self.tables[backup_table_name].column_by_name(backup_self.tables[backup_table_name].pk)) + if backup_index_type == 'hash': + + # create the index object + index = HashIndex(depth=1, lsb=True) + # insert records into the index + for idx, key in enumerate(backup_self.tables[backup_table_name].column_by_name(backup_self.tables[backup_table_name].pk)): + if key is None: + continue + index.insert(key, idx) + # use the search method to search for a specific key + result = index.find(key) + # use the delete method to delete a specific key + index.delete(key) + # use the get_all method to get all the key-value pairs in the index + pairs = index.get_all() + + backup_self._save_index(backup_index_name, index) + print('Index hash made') + + + elif index_type == 'btree': + bt = Btree(3) # 3 is arbitrary + + + # for each record in the primary key of the table, insert its value and index to the btree + for idx, key in enumerate(self.tables[table_name].column_by_name(self.tables[table_name].pk)): + if key is None: + continue + bt.insert(key, idx) + # save the btree + + self._save_index(index_name, bt) + print('b+tree index made') + def _has_index(self, table_name): ''' Check whether the specified table's primary key column is indexed. - Args: table_name: string. Table name (must be part of database). ''' - return table_name in self.tables['meta_indexes'].column_by_name('table_name') + + return self.tables['meta_indexes'].column_by_name('table_name') def _save_index(self, index_name, index): ''' Save the index object. - - Args: + A rgs: index_name: string. Name of the created index. - index: obj. The actual index object (btree object). + index: obj. The actual index object (btree object or HashIndex object). ''' + + try: os.mkdir(f'{self.savedir}/indexes') except: pass - with open(f'{self.savedir}/indexes/meta_{index_name}_index.pkl', 'wb') as f: - pickle.dump(index, f) + # check if the index is a HashIndex object + if isinstance(index, HashIndex): + # save the index as a pickle file + with open(f'{self.savedir}/indexes/meta_{index_name}_index.pkl', 'wb') as f: + pickle.dump(index, f) + elif isinstance(index, Btree): + # save the btree index as a pickle file + with open(f'{self.savedir}/indexes/meta_{index_name}_index.pkl', 'wb') as f: + pickle.dump(index, f) + else: + print('save error ') + + def get_index_name(self, table_name): + meta_indexes = self.tables['meta_indexes'] + table_names = meta_indexes.column_by_name('table_name') + index_names = meta_indexes.column_by_name('index_name') + + index_name = None + if table_name in table_names: + index_name = index_names[table_names.index(table_name)] + + return index_name + + def _load_idx(self, index_name): ''' Load and return the specified index. - Args: index_name: string. Name of created index. ''' + f = open(f'{self.savedir}/indexes/meta_{index_name}_index.pkl', 'rb') index = pickle.load(f) + f.close() return index def drop_index(self, index_name): ''' Drop index from current database. - Args: index_name: string. Name of index. ''' + + #all the changes needed to delete all the index data from the pkl tabls + if os.path.isfile('./meta_index_type.pkl'): + dataFR1=panda.read_pickle('./meta_index_type.pkl') + dataFR1 = dataFR1[dataFR1.index_name!= index_name] + dataFR1.to_pickle('./meta_index_type.pkl') + + print('') + else: + print('') + + + + if os.path.isfile('./index_uniques.pkl'): + dataFR2=panda.read_pickle('./index_uniques.pkl') + dataFR2 = dataFR2[dataFR2.index_name != index_name] + dataFR2.to_pickle('./index_uniques.pkl') + + print('') + else: + print('') if index_name in self.tables['meta_indexes'].column_by_name('index_name'): self.delete_from('meta_indexes', f'index_name = {index_name}') @@ -745,4 +1302,151 @@ def drop_index(self, index_name): warnings.warn(f'"{self.savedir}/indexes/meta_{index_name}_index.pkl" not found.') self.save_database() - \ No newline at end of file + + + + +class HashIndex: + BUCKET_SIZE = 4 + HASH_SIZE = 64 + + def __init__(self, depth=1, lsb=True): + self.depth = depth + self.lsb = lsb + self.buckets = {} + + def insert(self, key, value): + + ''' + insert: takes a key and a value as input and hashes the + key using either the _hash_lsb or _hash_msb + function (depending on the value of lsb). + If the resulting hash value is already in the buckets + dictionary, it appends the key and value to the corresponding bucket. + if not, it creates a new bucket with the given hash_value and + stores the key and value there. If the length of the bucket exceeds BUCKET_SIZE, + it calls the split_bucket function to split the bucket. + ''' + + if self.lsb: + hash_value = self._hash_lsb(key) + else: + hash_value = self._hash_msb(key) + + if hash_value in self.buckets: + self.buckets[hash_value].append((key, value)) + else: + self.buckets[hash_value] = [(key, value)] + if len(self.buckets[hash_value]) > self.BUCKET_SIZE: + self.split_bucket(hash_value) + + def split_bucket(self, hash_value): + + ''' + split_bucket: takes a hash_value as input and splits the bucket + with the given hash_value. It increments the depth variable + and then rehashes all items in the old bucket using the updated depth value + ''' + self.depth += 1 + old_bucket = self.buckets[hash_value] + del self.buckets[hash_value] + for key, value in old_bucket: + if self.lsb: + new_hash_value = self._hash_lsb(key) + else: + new_hash_value = self._hash_msb(key) + if new_hash_value in self.buckets: + self.buckets[new_hash_value].append((key, value)) + else: + self.buckets[new_hash_value] = [(key, value)] + + def _hash_lsb(self, key): + ''' + _hash_lsb and _hash_msb: functions to hash the key using + either the least significant bits or most significant bits, respectively. + ''' + return hash(key) % (2 ** self.depth) + + def _hash_msb(self, key): + return hash(key) % (2 ** self.depth) + + + + def find(self, key): + ''' + Description: This function looks up the value associated with a given key in a hash index. If the key is a two-tuple, perform a range query and return all values ​​between the start and end of the range. If the start or end is None, return all values ​​less than, greater than, or equal to the specified value respectively. Returns None if the key is not a tuple or does not match any value in the hash index. + + Args: + + key: The key to search for. Can be a tuple of two values ​​for range queries or a single value for exact matches. + return the goods: + + If key is a tuple, returns a list of all values ​​between the start and end of the range, or all values ​​less than, greater than, or equal to the specified value, depending on whether start or end is None. + If the key is a single value, returns the value associated with that key in the hash index, or None if the key is not found. + ''' + if isinstance(key, tuple) and len(key) == 2: + # range query + start, end = key + if start is None: + # if start is None, return all values less than end + result = [] + for bucket in self.buckets.values(): + for k, v in bucket: + if k < end: + result.append(v) + return result + elif end is None: + # if end is None, return all values greater than or equal to start + result = [] + for bucket in self.buckets.values(): + for k, v in bucket: + if k >= start: + result.append(v) + return result + else: + # return all values between start and end + result = [] + for bucket in self.buckets.values(): + for k, v in bucket: + if start <= k < end: + result.append(v) + return result + else: + # exact match + if self.lsb: + hash_value = self._hash_lsb(key) + else: + hash_value = self._hash_msb(key) + + if hash_value in self.buckets: + for k, v in self.buckets[hash_value]: + if k == key: + return v + return None + + def delete(self, key): + ''' + delete: takes a key as input and deletes the pair with the given + key from the hash index. + ''' + if self.lsb: + hash_value = self._hash_lsb(key) + else: + hash_value = self._hash_msb(key) + + if hash_value in self.buckets: + self.buckets[hash_value] = [ + pair for pair in self.buckets[hash_value] if pair[0] != key] + + def get_all(self): + ''' + get_all: returns a list of all key-value pairs in the hash index. + + + + + ''' + pairs = [] + for bucket in self.buckets.values(): + pairs.extend(bucket) + return pairs diff --git a/miniDB/misc.py b/miniDB/misc.py index aefada74..fdd18f3c 100644 --- a/miniDB/misc.py +++ b/miniDB/misc.py @@ -1,14 +1,64 @@ import operator +def between(value,range): + '''implements between functionality + checks if value is between range (limits included) + value: the specific value stored in table we are comparing + range: range of accepted values from between keyword; is string; must contain split_key''' + + split_key='&' # exp: BETWEEN 5 AND 25; + if(split_key not in range): + raise IndexError('Between syntax: BETWEEN "value1 & value2".') + try: # comparing floats-ints + range = [float(x) for x in range.split(split_key)] # splits the between range + float(value) # will work if value we are comparing is float or int + except ValueError: # are we comparing strings? + range = range.split('&') # range input must not include the split character + #print("range:",range[0],range[1]) #DEBUG + if ((value>=range[0] and value<=range[1]) or (value>=range[1] and value<=range[0])): # BETWEEN 5 & 10 == BETWEEN 10 & 5 + return True + else: + return False + +def not_between(value,range): + '''reverse of between, is true when value is outside of range, limits exlcuded (like typical sql)''' + split_key='&' # exp: BETWEEN 5 AND 25; + if(split_key not in range): + raise IndexError('Between syntax: BETWEEN "value1 & value2".') + try: # comparing floats-ints + range = [float(x) for x in range.split(split_key)] # splits the between range + float(value) # will work if value we are comparing is float or int + except ValueError: # are we comparing strings? + range = range.split('&') # range input must not include the split character + #print("range:",range[0],range[1]) #DEBUG + if (not((value>=range[0] and value<=range[1]) or (value>=range[1] and value<=range[0]))): # BETWEEN 5 & 10 == BETWEEN 10 & 5 + return True + else: + return False + +def reverse_operator(op): + '''reverses the operator when we are using NOT in specific condition, works with between and != : = too!''' + return { + '>' : '<=', + '>=' : '<', + '<' : '>=', + '<=' : '>', + '!=' : '=', + '=' : '!=', + 'between' : 'not_between'}.get(op) # specifically not adding not_between : between as it is will be the same as NOT BETWEEN + def get_op(op, a, b): ''' Get op as a function of a and b by using a symbol ''' ops = {'>': operator.gt, '<': operator.lt, + '!=': operator.ne, '>=': operator.ge, '<=': operator.le, - '=': operator.eq} + '=': operator.eq, + 'between': between, # matching between keyword with def between(value,range) + 'not_between' : not_between} # matching not_between keyword with def not_between(value,range) try: return ops[op](a,b) @@ -16,17 +66,24 @@ def get_op(op, a, b): return False def split_condition(condition): + isNOT = False ops = {'>=': operator.ge, '<=': operator.le, + '!=': operator.ne, '=': operator.eq, '>': operator.gt, - '<': operator.lt} + '<': operator.lt, + 'between': between # added between operation + } for op_key in ops.keys(): + if("not" in condition): + condition = condition.replace("not ","") # delete NOT, save detection with bool isNOT and continue + isNOT =True # Not detected = True + #print (condition,isNOT) #debug splt=condition.split(op_key) if len(splt)>1: left, right = splt[0].strip(), splt[1].strip() - if right[0] == '"' == right[-1]: # If the value has leading and trailing quotes, remove them. right = right.strip('"') elif ' ' in right: # If it has whitespaces but no leading and trailing double quotes, throw. @@ -34,7 +91,11 @@ def split_condition(condition): if right.find('"') != -1: # If there are any double quotes in the value, throw. (Notice we've already removed the leading and trailing ones) raise ValueError(f'Invalid condition: {condition}\nDouble quotation marks are not allowed inside values.') - + if(isNOT): # if not is detected then reverse the operator logic + op_key = reverse_operator(op_key) + #print("reversed")#debug + #print("OPKEY",op_key)#debug + #print(op_key)#debug return left, op_key, right def reverse_op(op): @@ -46,5 +107,7 @@ def reverse_op(op): '>=' : '<=', '<' : '>', '<=' : '>=', + '!=' : '!=', '=' : '=' }.get(op) + diff --git a/miniDB/table.py b/miniDB/table.py index f5c7d937..33d2e521 100644 --- a/miniDB/table.py +++ b/miniDB/table.py @@ -1,5 +1,6 @@ from __future__ import annotations -from tabulate import tabulate +import pandas as panda +from tabulate import tabulate # prints tables in a nice format import pickle import os import sys @@ -27,6 +28,7 @@ class Table: ''' def __init__(self, name=None, column_names=None, column_types=None, primary_key=None, load=None): + #print("table.py __init__ func RUN") if load is not None: # if load is a dict, replace the object dict with it (replaces the object with the specified one) @@ -67,16 +69,30 @@ def __init__(self, name=None, column_names=None, column_types=None, primary_key= else: self.pk_idx = None + #givinga value to anew variablee named unique index in order to use it when creating and showing the table later + if os.path.isfile('./unique_table.pkl'): + dataFr=panda.read_pickle('./unique_table.pkl') + searcher=(dataFr['tab_name']==name) + res=dataFr[searcher] + if res.empty: + self.unique_idx=None + else: + unique_boy=res.iloc[0]['unique_column'] + self.unique_idx=self.column_names.index(unique_boy) + + self.pk = primary_key # self._update() # if any of the name, columns_names and column types are none. return an empty table object def column_by_name(self, column_name): + #print("table.py column_by_name func RUN") return [row[self.column_names.index(column_name)] for row in self.data] def _update(self): + #print("_update RUN") ''' Update all the available columns with the appended rows. ''' @@ -85,6 +101,7 @@ def _update(self): setattr(self, col, self.columns[ind]) def _cast_column(self, column_name, cast_type): + #print("_casr_column func RUN") ''' Cast all values of a column using a specified type. @@ -103,6 +120,7 @@ def _cast_column(self, column_name, cast_type): def _insert(self, row, insert_stack=[]): + #print("_insert func RUN") ''' Insert row to table. @@ -110,6 +128,12 @@ def _insert(self, row, insert_stack=[]): row: list. A list of values to be inserted (will be casted to a predifined type automatically). insert_stack: list. The insert stack (empty by default). ''' + + table_n=row[0].strip("'") + + + + #print(self.unique_idx) if len(row)!=len(self.column_names): raise ValueError(f'ERROR -> Cannot insert {len(row)} values. Only {len(self.column_names)} columns exist') @@ -138,6 +162,7 @@ def _insert(self, row, insert_stack=[]): # self._update() def _update_rows(self, set_value, set_column, condition): + #print("_update_rows func RUN") ''' Update where Condition is met. @@ -223,10 +248,11 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by desc: boolean. If True, order_by will return results in descending order (False by default). limit: int. An integer that defines the number of rows that will be returned (all rows if None). ''' - + #print("_select_where func RUN") # if * return all columns, else find the column indexes for the columns specified if return_columns == '*': return_cols = [i for i in range(len(self.column_names))] + #print (return_cols) else: return_cols = [self.column_names.index(col.strip()) for col in return_columns.split(',')] @@ -270,20 +296,94 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by return s_table - def _select_where_with_btree(self, return_columns, bt, condition, distinct=False, order_by=None, desc=True, limit=None): + def _select_where_with_hashindex(self, return_columns, index, condition, distinct=False, order_by=None, desc=True, limit=None): + + ''' + This function makes the select from where choice using a filter of index type, this function is called if there is an index pressent on the column of the search + Args: + return_columns: list. The columns to be returned. + index:string.Index to be used + condition: string. A condition using the following format: + 'column[<,<=,==,>=,>]value' or + 'value[<,<=,==,>=,>]column'. + + Operatores supported: (<,<=,==,>=,>) + distinct: boolean. If True, the resulting table will contain only unique rows (False by default). + order_by: string. A column name that signals that the resulting table should be ordered based on it (no order if None). + desc: boolean. If True, order_by will return results in descending order (False by default). + limit: int. An integer that defines the number of rows that will be returned (all rows if None). + ''' + if return_columns == '*': + return_cols = [i for i in range(len(self.column_names))] + else: + return_cols = [self.column_names.index(colname) for colname in return_columns] + + column_name, operator, value = self._parse_condition(condition) + + # if the column in condition is not a primary key, abort the select + + + + + # find the rows that match the condition using the hash index + rows = [] + if operator == '=': + column = self.column_by_name(column_name) + + result = index.find(value) + print(result) + if result is not None: + rows.append(result) + else: + # if the operator is not '=', we need to manually search through the index + pairs = index.get_all() + for key, value in pairs: + if get_op(operator, key, value): + rows.append(value) + + try: + k = int(limit) + except TypeError: + k = None + # same as simple select from now on + rows = rows[:k] + # TODO: this needs to be dumbed down + dict = {(key):([[self.data[i][j] for j in return_cols] for i in rows] if key=="data" else value) for key,value in self.__dict__.items()} + + dict['column_names'] = [self.column_names[i] for i in return_cols] + dict['column_types'] = [self.column_types[i] for i in return_cols] + s_table = Table(load=dict) + + s_table.data = list(set(map(lambda x: tuple(x), s_table.data))) if distinct else s_table.data + + if order_by: + s_table.order_by(order_by, desc) + + if isinstance(limit,str): + s_table.data = [row for row in s_table.data if row is not None][:int(limit)] + + return s_table + + def _select_where_with_btree(self, return_columns, bt, condition, distinct=False, order_by=None, desc=True, limit=None): + ''' + The same function as created but with some support when i use index on a unique column + ''' + + #print("I RUN!? BTREE_SELECT") # if * return all columns, else find the column indexes for the columns specified if return_columns == '*': return_cols = [i for i in range(len(self.column_names))] else: + return_cols = [self.column_names.index(colname) for colname in return_columns] + print("return columns"+ return_cols) - + column_name, operator, value = self._parse_condition(condition) # if the column in condition is not a primary key, abort the select - if column_name != self.column_names[self.pk_idx]: - print('Column is not PK. Aborting') + # here we run the same select twice, sequentially and using the btree. # we then check the results match and compare performance (number of operation) @@ -513,7 +613,8 @@ def _full_join(self, table_right: Table, condition): return join_table - def show(self, no_of_rows=None, is_locked=False): + def show(self, no_of_rows=None, is_locked=False,print_output=True): + #print("table.py show func RUN") ''' Print the table in a nice readable format. @@ -533,14 +634,35 @@ def show(self, no_of_rows=None, is_locked=False): if self.pk_idx is not None: # table has a primary key, add PK next to the appropriate column headers[self.pk_idx] = headers[self.pk_idx]+' #PK#' - # detect the rows that are no tfull of nones (these rows have been deleted) - # if we dont skip these rows, the returning table has empty rows at the deleted positions + + #here i wil ldetect when printing table and and aa #uniques tag + if os.path.isfile('./unique_table.pkl'): + dataFr=panda.read_pickle('./unique_table.pkl') + searcher=(dataFr['tab_name']==self._name) + res=dataFr[searcher] + if res.empty: + print('') + + else: + unique_boy1=res.iloc[0]['unique_column'] + + print(unique_boy1) + if self.unique_idx is not None: + headers[self.unique_idx] = headers[self.unique_idx]+' #UNIQUE#' + non_none_rows = [row for row in self.data if any(row)] + + ### x # print using tabulate - print(tabulate(non_none_rows[:no_of_rows], headers=headers)+'\n') + #print(tabulate(non_none_rows[:no_of_rows], headers=headers)+'\n') + if(print_output): + print(tabulate(non_none_rows[:no_of_rows], headers=headers)+'\n') + return headers,non_none_rows + ### def _parse_condition(self, condition, join=False): + #print("[table.py] (_parse_condition) condition:",condition," this is where where happens") ''' Parse the single string condition and return the value of the column and the operator. @@ -561,11 +683,15 @@ def _parse_condition(self, condition, join=False): if left not in self.column_names: raise ValueError(f'Condition is not valid (cant find column name)') coltype = self.column_types[self.column_names.index(left)] - - return left, op, coltype(right) + if(op=='between' or op=='not_between'): + #print("[table.py] (_parse_condition) between detected") + return left, op, str(right) #between condition is always a string, type is handled internaly with between function (misc.py). + else: + return left,op,coltype(right) def _load_from_file(self, filename): + #print("table.py i just _load_from_file") ''' Load table from a pkl file (not used currently). diff --git a/miniDB/testcode.py b/miniDB/testcode.py new file mode 100644 index 00000000..ee9fb22d --- /dev/null +++ b/miniDB/testcode.py @@ -0,0 +1,38 @@ +import string + +letters = string.ascii_lowercase + +def between(value,range): + # checks if value is between range + # value: the specific value stored in table we are comparing + # range: range of accepted values from between keyword; is string; must contain split_condition + split_condition='&' # exp: BETWEEN 5 & 25; + if(split_condition not in range): + raise IndexError('Between syntax: BETWEEN "value1 & value2"') + try: # comparing floats-ints + range = [float(x) for x in range.split('&')] # splits the between range + float(value) # will work if value we are comparing is float or int + except ValueError: # are we comparing strings? + range = range.split('&') # range input must not include the split character + if ((value>=range[0] and value<=range[1]) or (value>=range[1] and value<=range[0])): # BETWEEN 5 & 10 == BETWEEN 10 & 5 + print(value) + return True + else: + return False + +# for i in letters: +# print(i,between(i,"g&d")) + +# print ("!!!") +# if("b"<="bamboo" and "b">="lama"): +# print("it is") +flag = False +range1="5&10" +print("Printing True values") +for i in range (100): + if(not flag): + print("range:",range1) + flag = True + between(i,range1) +# for i in reversed(range(100)): +# print(i,between(i,"25&50")) \ No newline at end of file diff --git a/new_file.txt b/new_file.txt new file mode 100644 index 00000000..a6e25399 --- /dev/null +++ b/new_file.txt @@ -0,0 +1 @@ +This is the content written to the file. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 6f8ddd98..5394856f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +pandas tabulate graphviz matplotlib diff --git a/unique_table.pkl b/unique_table.pkl new file mode 100644 index 00000000..21b42222 Binary files /dev/null and b/unique_table.pkl differ