import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

def read_data_from_csv():
    hotels=pd.read_csv('zomato.csv')
    return hotels


def remove_unwanted_columns():
    #DO NOT REMOVE FOLLOWING LINE
    #call read_data_from_csv() function to get dataframe
    hotels=read_data_from_csv()
    hotels=hotels[['name','online_order','book_table','rate','votes','location','rest_type','dish_liked','cuisines','approx_cost(for two people)','listed_in(type)']]
    
    return hotels


def rename_columns():
    #DO NOT REMOVE FOLLOWING LINE
    #call remove_unwanted_columns() function to get dataframe
    hotels = remove_unwanted_columns()
    
    #task2: rename columns,  only these columns are allowed in the dataset
    # 1.	Id
    # 2.	Name
    # 3.	online_order
    # 4.	book_table
    # 5.	rating
    # 6.	votes
    # 7.	location
    # 8.	rest_type
    # 9.	dish_liked
    # 10.	cuisines
    # 11.	approx_cost
    # 12.	type
    hotels.rename(columns={'rate':'rating','approx_cost(for two people)':'approx_cost','listed_in(type)':'type'},inplace=True)
    return hotels


#task3: handle  null values of each column
def null_value_check():
    
    #DO NOT REMOVE FOLLOWING LINE
    #call rename_columns() function to get dataframe
    hotels=rename_columns()
    #deleting null values of name column
    #hotels=hotels.dropna(subset=['name'],axis=0)
    hotels['name'].fillna('NA',inplace=True)
    #handling null values of online_order
    hotels['online_order'].fillna('NA',inplace=True)
    #handling null values of book_table
    hotels['book_table'].fillna('NA',inplace=True)
    #handling null values of rating
    hotels['rating'].fillna('', inplace=True)
    #handling null values of votes
    hotels['votes'].fillna(0, inplace=True)
    #handling null values of location
    hotels['location'].fillna('NA',inplace=True)
    #handling null values of rest_type
    hotels['rest_type'].fillna('NA',inplace=True)
    #handling null values of dish_liked
    hotels['dish_liked'].fillna('NA',inplace=True)
    #handling null values of cuisines
    hotels['cuisines'].fillna('NA',inplace=True)
    #handling null values of approx_cost
    hotels['approx_cost'].fillna(0,inplace=True)
    #handling null values of type
    hotels['type'].fillna('NA',inplace=True)
    return hotels


#task4 #find duplicates in the dataset
def find_duplicates():
    #DO NOT REMOVE FOLLOWING LINE
    #call null_value_check() function to get dataframe
    hotels=null_value_check()
    hotels.drop_duplicates(inplace=True)
    #droping the duplicates value keeping the first
    return hotels


#task5 removing irrelevant text from all the columns
def removing_irrelevant_text():
    #DO NOT REMOVE FOLLOWING LINE
    #call find_duplicates() function to get dataframe
    
    hotels= find_duplicates()
    
    #hotels = hotels[~hotels.astype(str).apply(lambda x: x.str.contains('RATED|rated|Rated')).any(axis=1)]
    for col in hotels.columns:
        hotels = hotels[hotels[col].str.contains('RATED|Rated') == False]
    
    return hotels


#task6: check for unique values in each column and handle the irrelevant values
def check_for_unique_values():
    #DO NOT REMOVE FOLLOWING LINE
    #call removing_irrelevant_text() function to get dataframe
    
    hotels=removing_irrelevant_text()
    hotels = hotels.query("online_order == 'Yes' or online_order == 'No'")
    hotels = hotels.query("book_table == 'Yes' or book_table == 'No'")
    #hotels['rating'] = hotels['rating'].str.replace('[^a-zA-Z0-9\s]', '')
    hotels['rating'] = hotels['rating'].str.replace('/5', '').replace('NEW', 0).replace('-',0).replace(np.nan,0)
   
    return hotels


#task7: remove the unknown character from the dataset and export it to "zomatocleaned.csv"
def remove_the_unknown_character():
    #DO NOT REMOVE FOLLOWING LINE
    #call check_for_unique_values() function to get dataframe
    
    dataframe=check_for_unique_values()
    
    #remove unknown character from dataset
    dataframe['name'] = dataframe['name'].str.replace('[Ãx][^A-Za-z]+','',regex=True)
    
    #export cleaned Dataset to newcsv file named "zomatocleaned.csv"
    dataframe.to_csv('zomatocleaned.csv')
    
    return dataframe

remove_the_unknown_character().to_csv('zomatocleaned.csv')


#check if mysql table is created using "zomatocleaned.csv"
#Use this final dataset and upload it on the provided database for performing analysis in  MySQL
#To Run this task first Run the appliation for Terminal to create table named 'Zomato' and then run test.

def start():
    remove_the_unknown_character()
    
def task_runner():
    start()


import pandas as pd
import sqlite3

#Defining a function which takes SQL queries and prints the output,

def SQL(dataframe_csv,query):
    
    df=dataframe_csv
    
    # create a connection to the database
    conn = sqlite3.connect('zomato.db')
    # convert the DataFrame to a SQLite database table

    df.to_sql('zomato_table', conn, if_exists='replace', index=False, index_label=list(df.columns))

    result_df = pd.read_sql_query(query, conn)
    
    conn.close()
    return result_df



df = pd.read_csv('zomatocleaned.csv') # read the CSV file into a pandas DataFrame

df=df.iloc[:,1:]

query="SELECT * FROM zomato_table LIMIT 10"

SQL(df,query)


query="select name,votes,rating from zomato_table where type='Delivery' order by votes desc limit 5;"

SQL(df,query)


#write your query

query="Select name, rating, location, type from zomato_table where location='Banashankari'and type='Delivery' order by rating desc limit 5;"
SQL(df,query)


query="select (select rating from zomato_table where location='Indiranagar' order by approx_cost limit 1) as rating1, (select rating from zomato_table where location='Indiranagar' order by approx_cost desc limit 1) as rating2 from zomato_table limit 1;"

SQL(df,query)


#write your query

query="SELECT online_order, SUM(votes) as total_votes FROM zomato_table GROUP BY online_order;"
SQL(df,query)


#write your query
query="select type,count(name),sum(votes),avg(rating) from zomato_table group by type having type!='NA' order by sum(votes) desc limit 7;"

SQL(df,query)


#write your query

query="SELECT name, dish_liked, rating, votes FROM zomato_table WHERE rating = (SELECT MAX(rating) FROM zomato_table) AND votes = (SELECT MAX(votes) FROM zomato_table)  LIMIT 1;"

SQL(df,query)


#write your query
query="SELECT name, rating, votes ,online_order from zomato_table WHERE online_order = 'No' AND rating > 3 AND votes >= 150  ORDER BY votes DESC LIMIT 15;"
SQL(df,query)


pd.read_csv('zomatocleaned.csv')


df=pd.read_csv('zomatocleaned.csv')

zomato=pd.DataFrame()
zomato['location']=df['location'].unique()
zomato


# import required libraries
from geopy.geocoders import Nominatim
import pandas as pd
from tqdm import tqdm 

# create a geolocator object
geolocator = Nominatim(user_agent="zomato_app")

# define a function to get the latitude and longitude of a location
def get_lat_long(location): 
    try:
        # use geolocator to get the latitude and longitude of the location
        geo_location = geolocator.geocode(location+", Bangalore")
        return geo_location.latitude, geo_location.longitude
    except:
        return None, None

    
latitude_list = []
longitude_list = []

for location in tqdm(zomato['location']):
    latitude, longitude = get_lat_long(location)
    latitude_list.append(latitude)
    longitude_list.append(longitude)

zomato['latitude'] = latitude_list
zomato['longitude'] = longitude_list

# print the updated dataframe
zomato

100%|██████████████████████████████████████████████████████████████████████████████████| 93/93 [00:48<00:00,  1.91it/s]


df = df.merge(zomato[['location', 'longitude','latitude']], left_on='location', right_on='location', how='left')
df=df.iloc[:,1:]
df.to_csv('zomato1.csv',index=False)
df.drop_duplicates(inplace=True)


x=pd.DataFrame()
x['dish_liked']=df['dish_liked'].value_counts().index
x['count']=df['dish_liked'].value_counts().values


dish_liked = x['dish_liked']

# Split the strings into lists
dish_liked = dish_liked.str.split(', ')

# Explode the lists into separate rows
dish_liked = dish_liked.explode()

# Count the occurrences of each item

counts = dish_liked.value_counts()

Dishes=pd.DataFrame()
Dishes["Dish"]=counts.index
Dishes['Count']=counts.values


Dishes


Dishes.to_csv('dishes_liked.csv')


y=pd.DataFrame()

y['rest_type']=df['rest_type'].value_counts().index
y['count']=df['rest_type'].value_counts().values

rest_type = y['rest_type']

# Split the strings into lists
rest_type = rest_type.str.split(', ')

# Explode the lists into separate rows
rest_type = rest_type .explode()


# Count the occurrences of each item

counts = rest_type .value_counts()

Rest_type=pd.DataFrame()
Rest_type["Rest_type"]=counts.index
Rest_type['Count']=counts.values

rest_type.to_csv('rest_type.csv',index=False)


pd.read_csv('rest_type.csv')


z=pd.DataFrame()

z['cuisines']=df['cuisines'].value_counts().index
z['count']=df['cuisines'].value_counts().values

cuisines = z['cuisines']

# Split the strings into lists
cuisines = cuisines.str.split(', ')

# Explode the lists into separate rows
cuisines = cuisines.explode()


# Count the occurrences of each item

counts = cuisines.value_counts()

cuisines=pd.DataFrame()
cuisines["cuisines"]=counts.index
cuisines['Count']=counts.values

cuisines.to_csv('cuisines.csv',index=False)


pd.read_csv('cuisines.csv')

	name	online_order	book_table	rating	votes	location	rest_type	dish_liked	cuisines	approx_cost	type
0	Jalsa	Yes	Yes	4.1	775	Banashankari	Casual Dining	Pasta, Lunch Buffet, Masala Papad, Paneer Laja...	North Indian, Mughlai, Chinese	800	Buffet
1	Spice Elephant	Yes	No	4.1	787	Banashankari	Casual Dining	Momos, Lunch Buffet, Chocolate Nirvana, Thai G...	Chinese, North Indian, Thai	800	Buffet
2	San Churro Cafe	Yes	No	3.8	918	Banashankari	Cafe, Casual Dining	Churros, Cannelloni, Minestrone Soup, Hot Choc...	Cafe, Mexican, Italian	800	Buffet
3	Addhuri Udupi Bhojana	No	No	3.7	88	Banashankari	Quick Bites	Masala Dosa	South Indian, North Indian	300	Buffet
4	Grand Village	No	No	3.8	166	Basavanagudi	Casual Dining	Panipuri, Gol Gappe	North Indian, Rajasthani	600	Buffet
5	Timepass Dinner	Yes	No	3.8	286	Basavanagudi	Casual Dining	Onion Rings, Pasta, Kadhai Paneer, Salads, Sal...	North Indian	600	Buffet
6	Rosewood International Hotel - Bar & Restaurant	No	No	3.6	8	Mysore Road	Casual Dining	None	North Indian, South Indian, Andhra, Chinese	800	Buffet
7	Onesta	Yes	Yes	4.6	2556	Banashankari	Casual Dining, Cafe	Farmhouse Pizza, Chocolate Banana, Virgin Moji...	Pizza, Cafe, Italian	600	Cafes
8	Penthouse Cafe	Yes	No	4.0	324	Banashankari	Cafe	Pizza, Mocktails, Coffee, Nachos, Salad, Pasta...	Cafe, Italian, Continental	700	None
9	Smacznego	Yes	No	4.2	504	Banashankari	Cafe	Waffles, Pasta, Coleslaw Sandwich, Choco Waffl...	Cafe, Mexican, Italian, Momos, Beverages	550	Cafes

	name	votes	rating
0	Empire Restaurant	8286	4.1
1	Church Street Social	7544	4.3
2	Meghana Foods	7233	4.4
3	Empire Restaurant	6948	4.1
4	Empire Restaurant	4884	4.4

	type	count(name)	sum(votes)	avg(rating)
0	Delivery	15014	2739517	3.464533
1	Dine-out	11626	2625344	3.476512
2	Drinks & nightlife	580	447949	3.817133
3	Buffet	517	436637	3.877583
4	Cafes	1018	347871	3.643672
5	Desserts	2291	264266	3.586687
6	Pubs and bars	421	251245	3.758435

	name	rating	votes	online_order
0	Toit	4.7	14956	No
1	Toit	4.7	14956	No
2	Truffles	4.7	14726	No
3	Truffles	4.7	14723	No
4	Truffles	4.7	14717	No
5	Truffles	4.7	14710	No
6	Truffles	4.7	14704	No
7	Truffles	4.7	14694	No
8	Truffles	4.7	14690	No
9	Truffles	4.7	14654	No
10	Truffles	4.7	14654	No
11	Truffles	4.7	14654	No
12	AB's - Absolute Barbecues	4.8	12121	No
13	The Black Pearl	4.7	10550	No
14	The Black Pearl	4.7	10547	No

	Unnamed: 0	name	online_order	book_table	rating	votes	location	rest_type	dish_liked	cuisines	approx_cost	type
0	0	Jalsa	Yes	Yes	4.1	775	Banashankari	Casual Dining	Pasta, Lunch Buffet, Masala Papad, Paneer Laja...	North Indian, Mughlai, Chinese	800	Buffet
1	1	Spice Elephant	Yes	No	4.1	787	Banashankari	Casual Dining	Momos, Lunch Buffet, Chocolate Nirvana, Thai G...	Chinese, North Indian, Thai	800	Buffet
2	2	San Churro Cafe	Yes	No	3.8	918	Banashankari	Cafe, Casual Dining	Churros, Cannelloni, Minestrone Soup, Hot Choc...	Cafe, Mexican, Italian	800	Buffet
3	3	Addhuri Udupi Bhojana	No	No	3.7	88	Banashankari	Quick Bites	Masala Dosa	South Indian, North Indian	300	Buffet
4	4	Grand Village	No	No	3.8	166	Basavanagudi	Casual Dining	Panipuri, Gol Gappe	North Indian, Rajasthani	600	Buffet
...	...	...	...	...	...	...	...	...	...	...	...	...
34048	56247	Best Brews - Four Points by Sheraton Bengaluru...	No	No	3.6	27	Whitefield	Bar	NaN	Continental	1,500	Pubs and bars
34049	56248	Vinod Bar And Restaurant	No	No	NaN	0	Whitefield	Bar	NaN	Finger Food	600	Pubs and bars
34050	56249	Plunge - Sheraton Grand Bengaluru Whitefield H...	No	No	NaN	0	Whitefield	Bar	NaN	Finger Food	2,000	Pubs and bars
34051	56250	Chime - Sheraton Grand Bengaluru Whitefield Ho...	No	Yes	4.3	236	ITPL Main Road, Whitefield	Bar	Cocktails, Pizza, Buttermilk	Finger Food	2,500	Pubs and bars
34052	56251	The Nest - The Den Bengaluru	No	No	3.4	13	ITPL Main Road, Whitefield	Bar, Casual Dining	NaN	Finger Food, North Indian, Continental	1,500	Pubs and bars

Write an SQL queries to solve the problem statements.¶

Carrying out further processing on our cleaned dataset to make it suitable for our analysis.¶

If we try to get longitude and latitude with geopy library by taking each of the value from each cell in locations column, it will take too much time. So, we will¶

	location	latitude	longitude
0	Banashankari	12.915221	77.573598
1	Basavanagudi	12.941726	77.575502
2	Mysore Road	12.946662	77.530090
3	Jayanagar	12.929273	77.582423
4	Kumaraswamy Layout	12.908149	77.555318
...	...	...	...
88	West Bangalore	12.984852	77.540063
89	Magadi Road	12.975608	77.555356
90	Yelahanka	13.100698	77.596345
91	Sahakara Nagar	13.062147	77.580061
92	Peenya	13.032942	77.527325

	Dish	Count
0	Pasta	681
1	Burgers	618
2	Pizza	567
3	Cocktails	531
4	Biryani	458
...	...	...
2785	Chicken Pepper Fry	1
2786	Patata Bravas	1
2787	Coffee Toffee	1
2788	Mala Chicken	1
2789	Papaya Juice	1

	cuisines	Count
0	North Indian	1205
1	Chinese	1023
2	Fast Food	554
3	Continental	541
4	South Indian	495
...	...	...
102	Sindhi	1
103	Russian	1
104	Bohri	1
105	Cantonese	1
106	Malwani	1

	rest_type
0	Quick Bites
1	Casual Dining
2	Cafe
3	Delivery
4	Dessert Parlor
...	...
158	Quick Bites
159	Dessert Parlor
160	Food Court
161	Sweet Shop
162	Dessert Parlor