<Kaggle-Course>
-
캐글 data_cleaning/03-parsing-dates<Kaggle-Course> 2023. 3. 16. 18:24
date_format = '%m/%d/%Y' earthquakes.loc[indices,'Date'] = pd.to_datetime(earthquakes.loc[indices,'Date']) \ .dt.strftime(date_format) earthquakes['date_parsed'] = pd.to_datetime(earthquakes['Date']) day_of_month_earthquakes = earthquakes['date_parsed'].dt.day date_lengths = earthquakes.Date.str.len() date_lengths.value_counts()
-
캐글 data_cleaning/02-scaling-and-normalization<Kaggle-Course> 2023. 3. 16. 18:19
스케일링에서는 데이터 범위를 변경하는 반면 정규화에서는 데이터 분포의 모양을 변경합니다. # 스케일링 from mlxtend.preprocessing import minmax_scaling minmax_scaling(X, columns=['x1']) 보통 0~1사이로 스케일링된다. # 정규화 from scipy import stats normalized_data = stats.boxcox(original_data)
-
캐글 advanced_sql/04-writing-efficient-queries<Kaggle-Course> 2023. 3. 15. 19:49
WITH LocationsAndOwners AS ( SELECT * FROM CostumeOwners co INNER JOIN CostumeLocations cl ON co.CostumeID = cl.CostumeID ), LastSeen AS ( SELECT CostumeID, MAX(Timestamp) FROM LocationsAndOwners GROUP BY CostumeID ) SELECT lo.CostumeID, Location FROM LocationsAndOwners lo INNER JOIN LastSeen ls ON lo.Timestamp = ls.Timestamp AND lo.CostumeID = ls.CostumeID WHERE OwnerID = MitzieOwnerID
-
캐글 advanced_sql/02-analytic-functions<Kaggle-Course> 2023. 3. 15. 19:48
trip_number_query = """ SELECT pickup_community_area, trip_start_timestamp, trip_end_timestamp, RANK() OVER ( PARTITION BY pickup_community_area ORDER BY trip_start_timestamp ) AS trip_number FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips` WHERE DATE(trip_start_timestamp) = '2017-05-01' """ break_time_query = """ SELECT taxi_id, trip_start_timestamp, trip_end_timestamp, TIMESTAMP_DIFF..
-
캐글 advanced_sql/03-nested-and-repeated-data<Kaggle-Course> 2023. 3. 15. 19:46
pop_lang_query = """ SELECT language.name AS language_name, COUNT(*) AS num_repos FROM `bigquery-public-data.github_repos.languages`, UNNEST(language) AS language GROUP BY language_name ORDER BY num_repos DESC """ all_langs_query = """ SELECT language.name AS name, language.bytes AS bytes FROM `bigquery-public-data.github_repos.languages`, UNNEST(language) AS language WHERE repo_name = 'polyrab..
-
캐글 advanced_sql/01-joins-and-unions<Kaggle-Course> 2023. 3. 15. 16:22
first_query = """ SELECT q.id AS q_id, MIN(TIMESTAMP_DIFF(a.creation_date, q.creation_date, SECOND)) as time_to_answer FROM `bigquery-public-data.stackoverflow.posts_questions` AS q INNER JOIN `bigquery-public-data.stackoverflow.posts_answers` AS a ON q.id = a.parent_id WHERE q.creation_date >= '2018-01-01' and q.creation_date < '2018-02-01' GROUP BY q_id ORDER BY time """ correct_query = """ ..