3.7.0 (v3.7.0:1bf9cc5093, Jun 26 2018, 23:26:24) [Clang 6.0 (clang-600.0.57)] Python Type "help", "copyright", "credits" or "license" for more information. # Review: What is a database 'query'? # A database query is a command in the SQL language that asks to get # some information from the database. # Before we run some queries, let's connect to the database import sqlite3 DB = 'weather.db' # the database name con = sqlite3.connect(DB) cur = con.cursor() # make the cursor # We've now made a connection to the database # We can now make some queries # Note that we already have a database file that we're going to query # If we didn't have it, we'd have to create the database like # we did in the previous lectures. # The first query we will make: Get all the rows and columns # Two things we need: # 1. The SQL langauge query # The SQL keyword that allows us to get the data from # a table in the database is 'SELECT'. # It's up to us to tell sql which rows/columns to select. # Let's say we want to get all of the rows and columns. # First, we have to specify which columns we want to select. # If we want all of the columns, we say 'SELECT *' # Next, we specify which table in the database to select from. # (databases can have multiple tables) # We tell sql which table to select from using the 'FROM' keyword # For example, if we want to select from the precipitation table, # we say 'FROM precipitation' # Now, if we put it together, we get: # 'SELECT * FROM precipitation' # What about the rows? # By default, if you don't specify which rows you want somehow, # sql gives you *all* of the rows in the table. # So let's run the query 'SELECT * FROM precipitation' all_rows_all_columns_query = 'SELECT * FROM precipitation' # ^ we can save the query as a string # and now, we can execute the query using the cursor. cur.execute(all_rows_all_columns_query) # After executing the query, the cursor contains the data for the # rows that we selected. # To get all of the rows we selected, we use cur.fetchall() cur.fetchall() [("St.John's", 322.1, 1482, 217), ('Charlottetown', 338.7, 1201, 177), ('Halifax', 261.4, 1474, 170), ('Fredericton', 294.5, 1131, 156), ('Quebec', 337.0, 1208, 178), ('Montreal', 214.2, 940, 162), ('Ottawa', 221.5, 911, 159), ('Toronto', 135.0, 819, 139), ('Winnipeg', 114.8, 504, 119), ('Regina', 107.4, 364, 109), ('Edmonton', 129.6, 461, 123), ('Calgary', 135.4, 399, 111), ('Vancouver', 54.9, 1167, 164), ('Victoria', 46.9, 858, 153), ('Whitehorse', 145.2, 269, 122), ('Yellowknife', 143.9, 267, 118)] # The data that we back from querying the database is a list of # tuples, where each tuple contains the data for one row in the # database. cur.fetchall() [] # Once you read from fetchall(), you can't go back # So save your data to a variable # We'll have to execute the query again first: cur.execute(all_rows_all_columns_query) data_for_all_rows = cur.fetchall() data_for_all_rows [("St.John's", 322.1, 1482, 217), ('Charlottetown', 338.7, 1201, 177), ('Halifax', 261.4, 1474, 170), ('Fredericton', 294.5, 1131, 156), ('Quebec', 337.0, 1208, 178), ('Montreal', 214.2, 940, 162), ('Ottawa', 221.5, 911, 159), ('Toronto', 135.0, 819, 139), ('Winnipeg', 114.8, 504, 119), ('Regina', 107.4, 364, 109), ('Edmonton', 129.6, 461, 123), ('Calgary', 135.4, 399, 111), ('Vancouver', 54.9, 1167, 164), ('Victoria', 46.9, 858, 153), ('Whitehorse', 145.2, 269, 122), ('Yellowknife', 143.9, 267, 118)] for row in data_for_all_rows: print(row) ("St.John's", 322.1, 1482, 217) ('Charlottetown', 338.7, 1201, 177) ('Halifax', 261.4, 1474, 170) ('Fredericton', 294.5, 1131, 156) ('Quebec', 337.0, 1208, 178) ('Montreal', 214.2, 940, 162) ('Ottawa', 221.5, 911, 159) ('Toronto', 135.0, 819, 139) ('Winnipeg', 114.8, 504, 119) ('Regina', 107.4, 364, 109) ('Edmonton', 129.6, 461, 123) ('Calgary', 135.4, 399, 111) ('Vancouver', 54.9, 1167, 164) ('Victoria', 46.9, 858, 153) ('Whitehorse', 145.2, 269, 122) ('Yellowknife', 143.9, 267, 118) # After executing a query, the cursor will let you access the data # for that query. If you run another query, the previous data # will be overwritten. # So in general, save what you get from fetchall() into a variable # so that you don't lose your data. # Now, let's do an example where we select only certain rows. # To specify the rows we want to select, we have to tell sql # what kind of rows we're looking for. # We specify these rows based on some condition(s) # These conditions are specified based on the data in the columns # of the table. # For example, we can look at the column 'days', and put a condition # on this column. # 'days > 180' Only give back rows where the column days # contains a value greater than 180. # In the SQL query, we specify a condition using the 'WHERE' keyword # in this case: 'WHERE days > 180' # All we have to do is stick this condition on the end of the query # 'SELECT * FROM precipitation WHERE days > 180' # In english: Select all columns from the precipitation table where # and the rows where the 'days' value is greater than 180. days_180_query = 'SELECT * FROM precipitation WHERE days > 180' cur.execute(days_180_query) days_180_data = cur.fetchall() days_180_data [("St.John's", 322.1, 1482, 217)] # We get back a list of tuples with the rows that match our query # In this case, only one row matched the query # Notice, we still got all four columns, because we specifed to # select all columns using the *. days_180_data[0] ("St.John's", 322.1, 1482, 217) days_180_data[0][1] 322.1 # Let's say we only want the 'city' and 'snow' columns # Again, we specify the columns right after the SELECT keyword # In this case, 'SELECT city, snow' # Q: What is the query for selecting the columns city and snow and # all of the rows? # A: 'SELECT city, snow FROM precipitation' city_snow_query = 'SELECT city, snow FROM precipitation' cur.execute(city_snow_query) city_snow_data = cur.fetchall() city_snow_data [("St.John's", 322.1), ('Charlottetown', 338.7), ('Halifax', 261.4), ('Fredericton', 294.5), ('Quebec', 337.0), ('Montreal', 214.2), ('Ottawa', 221.5), ('Toronto', 135.0), ('Winnipeg', 114.8), ('Regina', 107.4), ('Edmonton', 129.6), ('Calgary', 135.4), ('Vancouver', 54.9), ('Victoria', 46.9), ('Whitehorse', 145.2), ('Yellowknife', 143.9)] # Q. What is the query for selecting the columns city and day, and # only the rows where the days value is less than 150? # A. 'SELECT city, days FROM precipitation WHERE days < 150' city_days_150_query = 'SELECT city, days FROM precipitation WHERE days < 150' cur.execute(city_days_150_query) city_days_150_data = cur.fetchall() city_days_150_data [('Toronto', 139), ('Winnipeg', 119), ('Regina', 109), ('Edmonton', 123), ('Calgary', 111), ('Whitehorse', 122), ('Yellowknife', 118)] for row in city_days_150_data: print(row[0]) Toronto Winnipeg Regina Edmonton Calgary Whitehorse Yellowknife