データフレームに新しい列を追加し、同じ値を代入する。
df['col'] = val
All generations.
df['col'] = val
def make_matrix(df, columns_from='from', columns_to='to'):
df = df.copy()
df['value'] = 1
matrix = df.pivot_table(
index=columns_from, columns=columns_to, values='value')
matrix = matrix.fillna(0)
return matrix.as_matrix()
make_matrix(df)
# -*- coding: utf-8 -*-
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df = pd.read_csv('data.csv')
#df.set_index('date')
#data = pd.read_csv('data.csv')
#data.set_index('date')
#plt.plot(data)
#plt.plot(df)
#plt.show()
#画像を書き出す
#fig = plt.figure()
#fig.savefig('fig.png')
#plt.show()
#2枚のグラフを並べる
#fig, ax1 = plt.subplots(2)
#ax1[0].plot(df)
#ax1[1].plot(df)
#plt.show()
#スムースにする
#x = df.index
#y = df['Ranking']
#plt.plot
import openpyxl
def separate():
wb = openpyxl.load_workbook('test.xlsx')
ws = wb.worksheets[0]
for row in range(2, ws.max_row + 1):
date = ws.cell(row=row, column=3).value
if date in wb.sheetnames:
wb.copy_worksheet(wb[date])
wb.active = wb.sheetnames.index(date)
wb.active.max_row += 1
else:
wb.create_sheet(title=date)
wb.active = wb.sheetnames.index(date)
wb.active.append(ws[row])
wb.save('test_out.xlsx')
if __name__ == '__main__':
separate()
def pivot(df):
return df.pivot(index = 'ID', columns = 'cluster')
def bmi(data):
# 入力された辞書を変数dataとして定義
data = {'height': 2.0, 'weight': 80.5}
# 辞書の中身を分解
for key, value in data.items():
print(key, value)
# 辞書の0番目と1番目の要素を取り出す
height = data['height']
weight = data['weight']
# BMIを計算
bmi_value = weight / (height ** 2)
return bmi_value
bmi(data)
from openpyxl import load_workbook, Workbook
wb = load_workbook('output.xlsx')
ws = wb.active
def add_sheet(wb, header, data):
ws = wb.create_sheet(header)
ws.append(['日付', '商品名', '数量'])
for row in data:
ws.append(row)
def group_data(ws):
data = {}
for row in ws.rows:
if row[0].value == '日付':
continue
row_data = [cell.value for cell in row]
if row[0].value not in data:
data[row[0].value] = [row_data]
else:
data[row[0].value].append(row_data)
return data
data = group_data(ws)
for header, rows in data.items():
add_sheet(wb, header, rows
import requests
def get_page_view(page_name):
url = 'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/ja.wikipedia/all-access/all-agents/{page_name}/monthly/20190101/20190131'.format(page_name=page_name)
r = requests.get(url)
return r.json()['items'][0]['views']
get_page_view('ページ名')
import openpyxl
from openpyxl import Workbook
def copy_sheet(data_set, sheet_name):
wb = openpyxl.load_workbook(data_set)
sheet = wb.active
sheet['1:1']
for col in sheet['1:1']:
for col_name in col:
if col_name.value == sheet_name:
col_num = col_name.col_idx
break
ws = wb.worksheets[0]
wb.copy_worksheet(ws)
ws.title = '2020-01-01'
row_num = 1
for row in sheet.iter_rows(min_row=2, min_col=1, max_col=sheet.max_column):
date = row[col_num-1].value
if date == '2020-01-01':
row_num = row_num + 1
for row_cell in row:
ws.cell(row=
def get_period_dates(string):
pattern = re.compile('調査対象期間:([0-9]{4}年[0-9]{1,2}月[0-9]{1,2}日〜[0-9]{4}年[0-9]{1,2}月[0-9]{1,2}日)')
s = pattern.search(string)
if s:
return s.group(1)
else:
return ''
def get_street_network(point, distance):
G = ox.graph_from_point(point, distance, network_type='drive')
area = ox.project_gdf(ox.footprints_from_point(point, distace = distance, footprint_type = 'building'))
area = area.unary_union.area
return G, area
def get_rank(url, tag, attribute, attribute_name, index_start, index_end):
html = urllib.request.urlopen(url).read()
soup = BeautifulSoup(html, "html.parser")
rank = []
for i in range(index_start, index_end):
element = soup.select(tag)[i]
rank.append(element[attribute])
return rank
rank = get_rank(url, 'tr', 'class', '', 22, 27)
def get_str_with_regexp(df, col, regexp):
return df[col][df[col].str.contains(regexp)]
def dict_to_col(dict_data):
df_data = pd.DataFrame(dict_data)
return df_data.T.reset_index()
def get_date(date_str):
start_index = date_str.find('調査対象期間:')
end_index = date_str.find('〜')
date_str_without_symbol = date_str[start_index + 6:end_index]
date_list = date_str_without_symbol.split('/')
date_list_with_zero = []
for date in date_list:
if len(date) == 1:
date_list_with_zero.append('0' + date)
else:
date_list_with_zero.append(date)
return '-'.join(date_list_with_zero)
def utc_to_jst(df, utc_col):
df[utc_col] = df[utc_col].dt.tz_convert("Asia/Tokyo")
return df
import openpyxl
def split_excel(filepath):
# Excelファイルを開く
wb = openpyxl.load_workbook(filepath)
# アクティブなシートを取得
ws = wb.active
# 3列目のデータのみを取り出す
dates = [cell.value for row in ws.iter_rows(min_row=2, max_col=3, max_row=ws.max_row) for cell in row]
# 同じ日付のデータをまとめる
grouped = {}
for date in dates:
if date not in grouped:
grouped[date] = []
grouped[date].append(date)
# 元のシートからデータを削除
ws.delete_rows(2, ws.max_row - 1)
# 新しいシ
def replace(string):
return string.replace('/', '-')
replace('test/test')
def get_utc_from_utcplus9(time):
return time.tz_convert('UTC')
def convert_df_to_dict(df, key1, key2):
# 引数に指定されたkey1, key2を取り出して、
# 1行をtupleにして、複数のtupleをlistに格納
list_tuple = [tuple(x) for x in df[[key1, key2]].values]
# list_tupleをkey, valueを反転させたdictに変換
dict_key_value = dict((y, x) for x, y in list_tuple)
return dict_key_value
import re
def get_date_string(df):
pattern = r'\:.+〜'
return re.findall(pattern, df)[0]
import openpyxl
#openpyxlを使う
def openpyxlで開く(ファイル名):
wb = openpyxl.load_workbook(ファイル名)
return wb
#読み込んだファイルを開く
wb = openpyxlで開く('ファイル名.xlsx')
ws = wb.active
#3列目に日付が入っていることを確認
for i in range(2, ws.max_row + 1):
print(ws.cell(row=i, column=3).value)
#日付でグルーピング
days = {}
for i in range(2, ws.max_row + 1):
date = ws.cell(row=i, column=3).value
if not date in days:
days[date] = []
days[date].append(i)
from bs4 import BeautifulSoup
import requests
url = "https://tv.so-net.ne.jp/chart/7/"
html = requests.get(url)
soup = BeautifulSoup(html.content, "html.parser")
print(soup.select('//*[@id="rankingChart"]/tbody/tr[22]/td'))
def get_related_artists(artist_id):
results = sp.artist_related_artists(artist_id)
artists = results['artists']
num_artists = len(artists)
count = 0
while num_artists < 50 and count < 5:
# call the function again
results = sp.artist_related_artists(artist_id)
artists.extend(results['artists'])
num_artists = len(artists)
count += 1
return artists
def adjacency_matrix(df, col_id, col_value):
# create adjacency matrix
mat = np.zeros((len(df[col_id].unique()), len(df[col_id].unique())))
for i, row in df.iterrows():
mat[row[col_id]][row[col_id]] = row[col_value]
return mat
def get_tweet_text(url):
html = requests.get(url)
soup = BeautifulSoup(html.text, "html.parser")
text = soup.select_one("#rankingChart > tbody > tr:nth-child(22) > td")
return text.get_text()
x = ['\r\n\t\t\t実査日: 2022年8月20日\u3000調査対象期間:2022年8月13日~8月19日']
x[0].split('調査対象期間:')[1]
import requests
def get_string_by_xpath(url, x_path):
res = requests.get(url)
soup = bs4.BeautifulSoup(res.text)
return soup.select(x_path)
df = pd.read_csv('your_file.csv', dtype='Int64')
def get_top_n_rank(df, col, n=5):
return df[col].value_counts().iloc[:n]
def get_date(s):
a = s.find("調査対象期間:")
a = a + 6
b = s.find("〜")
return (s[a:b])
get_date("調査対象期間:2017年2月10日〜2017年2月16日")
from pageviewapi import period, wikimedia
wikimedia.article_views('ja.wikipedia', '特定のWikipediaページの', granularity='daily',
access='all-access', agent='all-agents')
from openpyxl import load_workbook
import openpyxl
def filter(filename):
wb = load_workbook(filename)
print(wb.sheetnames)
sheet = wb['Sheet1']
new_wb = openpyxl.Workbook()
new_ws = new_wb.active
new_ws.title = 'new'
row_num = 0
for row in sheet.values:
if row[0] == 'Japan':
new_ws.append(row)
new_wb.save('out.xlsx')
filter('sample.xlsx')
data = pd.read_csv('data.csv', dtype={'col1': np.int64, 'col2': np.int64})
def union(a, b):
a = set(a)
b = set(b)
c = a.union(b)
return list(c)
union([1, 2, 3, 4], [3, 4, 5, 6])
def make_adj_matrix(dataset, rows, columns):
adj_matrix = []
for row_id in range(rows):
adj_matrix.append([])
for column_id in range(columns):
adj_matrix[row_id].append(dataset[row_id][column_id])
return adj_matrix
def csv_to_list(file, col_name):
col_list = []
for row in file:
col_list.append(row[col_name])
return col_list
def make_adj_matrix(df):
df = df.copy()
df['weight'] = 1.0
df = df.pivot(index='node1_id', columns='node2_id', values='weight')
return df.fillna(0)
def my_max(a, b, c):
if a >= b and a >= c:
return a
if b >= a and b >= c:
return b
return c
my_max(1, 2, 3)
from pageviewapi import Period, Api
def get_page_views(page_title, date):
return Api.pageviews(
project='ja.wikipedia',
pageviews={'per-article': {page_title: [date]}},
agent='user',
granularity='hourly',
access='all-access',
date_range=[date, date]
)[date][page_title]['views']
get_page_views('機械学習', '20200401')
from urllib.request import urlopen
from bs4 import BeautifulSoup
from urllib.error import HTTPError
def get_url(url):
try:
html = urlopen(url)
except HTTPError:
return None
try:
bsObj = BeautifulSoup(html.read(), 'lxml')
#bsObj = BeautifulSoup(html.read(), 'html.parser')
except AttributeError:
return None
return bsObj.find("div", {"id": "rso"}).find("div", {"class":"g"}).find("div", {"class":"rc"}).find("a").get('href')
def change(text):
return text.replace('/', '-')
change('https://note.com/tsurezure_cat/n/n8bfbb22dd9ae')
import datetime
import openpyxl
def split_data_by_date(file_name):
wb = openpyxl.load_workbook(file_name)
sheet = wb.active
date_list = []
date_data = {}
for row in sheet.iter_rows(min_row=1, max_row=sheet.max_row, min_col=3, max_col=3):
if row[0].value not in date_list:
date_list.append(row[0].value)
date_data[row[0].value] = []
date_data[row[0].value].append(row)
# new workbook
new_wb = openpyxl.Workbook()
new_wb.create_sheet(title='date list')
new_sheet = new_wb.get_sheet_by_name('date list')
for index, cell in enumerate(date_list):
new_sheet.cell(row=index+1, column=1
def pivot(df):
return df.pivot(index='index',columns='date')
def adj_matrix(df):
n = len(df)
A = np.zeros((n,n))
for i in range(n):
for j in range(n):
A[i,j] = [1 if df.loc[i][0]==df.loc[j][0] or df.loc[i][1]==df.loc[j][1] else 0]
return A
def adj_matrix(df, row, col):
return df.pivot(row, col)
import re
def extract_date(text):
m = re.search("調査対象期間:(\d{4}\/\d{2}\/\d{2}〜\d{4}\/\d{2}\/\d{2})", text)
if m:
found = m.group(1)
return(found)
else:
return("not found")
text = "調査対象期間:2016/10/01〜2016/10/16"
extract_date(text)
# import wordcloud
from wordcloud import WordCloud
# textを取り込み、wordcloudを生成
fpath = "C:/Windows/Fonts/meiryo.ttc"
wordcloud = WordCloud(background_color="black", font_path=fpath, width=900, height=500).generate(text)
# 画像を表示
plt.figure(figsize=(15,12))
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
df = pd.read_csv("data.csv", dtype={"id": np.int32})
def move_dir(path):
%cd {path}
def transpose_df(df):
"""
transpose dataframe
"""
df = df.T
return df
transpose_df(df)
def _make_adjacency_matrix(df, number, thres=0, normalize=False):
num_max = df.shape[0]
result = np.zeros((num_max, num_max), dtype=np.float32)
for i in range(num_max):
for j in range(num_max):
result[i][j] = abs(df.iloc[i, number] - df.iloc[j, number])
result[result < thres] = 0
if normalize:
result[result > 0] = 1
return result
from openpyxl import load_workbook
wb = load_workbook(filename = 'sample.xlsx')
ws = wb.active
wbnew = Workbook()
for row in ws.iter_rows(min_row=1, min_col=3, max_col=3):
for cell in row:
# ここでExcelのDate型をdatetime型に変換
date = datetime.datetime.strptime(cell.value, '%Y-%m-%d %H:%M:%S')
# 日付ごとにシートを作成
wsnew = wbnew.create_sheet(index=0, title=date.strftime('%Y%m%d'))
wsnew.append(ws['A1:C1'])
for row in ws.iter_rows(min_row=2, max_row=ws.max_row, min_col=1, max_col
import re
# 実査日: 2022年8月20日 調査対象期間:2022年8月13日 ~ 8月19日
# ['\r\n\t\t\t実査日: 2022年8月20日\u3000調査対象期間:2022年8月13日~8月19日']
def get_exam_period(sentence):
# 正規表現のコンパイル
pattern = re.compile(r'調査対象期間:(.*)〜(.*)$')
# 文字列から日付を抜き出す
m = pattern.search(sentence)
start = m.group(1)
import pandas as pd
def replace_all(df):
for c in df.columns:
if type(df[c][0]) is str:
df[c] = df[c].str.replace('.', '').str.replace(',', '.').astype('float')
return df
replace_all(df)
from pandas import DataFrame
from copy import deepcopy
def copy(df):
return deepcopy(df)
The function get_survey_period takes a string text as the argument and returns the period of the survey.
# 全てのカラムを置換する
def replace_all(df, old, new):
for col in df.columns:
df[col] = df[col].str.replace(old, new)
return df
replace_all(df, "old", "new")
def make_adjacent_matrix(df, col0, col1):
n = df.shape[0]
matrix = np.zeros((n, n))
for i, r0 in enumerate(df[col0]):
for j, r1 in enumerate(df[col1]):
if r0 == r1:
matrix[i, j] = 1
return matrix
make_adjacent_matrix(df, "from", "to")
from openpyxl import load_workbook
def extract():
wb = load_workbook("sample.xlsx")
sheetnames = wb.sheetnames
sheet = wb[sheetnames[0]]
for row in sheet.rows:
if row[0].value == 'Japan':
print(row[0].value)
print(row[1].value)
extract()
import matplotlib.pyplot as plt
def plot_graph(x, y):
plt.plot(x, y)
plt.show()
x = [10, 20, 30, 40, 50, 60]
y = [1, 2, 3, 4, 5, 6]
plot_graph(x, y)
def make_adjacency_matrix(data):
import pandas as pd
import numpy as np
data = np.array(data)
adjacency_matrix = np.dot(data, data.T)
adjacency_matrix[np.diag_indices_from(adjacency_matrix)] = 0
adjacency_matrix = pd.DataFrame(adjacency_matrix)
return(adjacency_matrix)
def concat_v(df, df2):
return pd.concat([df, df2], axis=0)
concat_v(df, df2)
df.rename(columns={
'col_original_name_1':'col_new_name_1',
'col_original_name_2':'col_new_name_2',
'col_original_name_3':'col_new_name_3'
}, inplace=True)
import pandas as pd
df = pd.DataFrame(index=[0,1,2,3,4])
df.columns = ['value']
df['value'] = [1,2,3,4,5]
def reset_index(df):
return df.reset_index(drop=True)
reset_index(df)
def convert_datetime(df, column_name):
df[column_name] = pd.to_datetime(df[column_name]).dt.tz_localize('UTC').dt.tz_convert('Asia/Tokyo')
return df
df = convert_datetime(df, 'column_name')
The function union takes two arguments a and b and returns the union of a and b.
df = pd.DataFrame({'a':[0, 1, 2], 'b':[3, 4, 5]})
df.reset_index()
def read_csv(csv_filename):
with open(csv_filename, 'r') as f:
reader = csv.reader(f)
header = next(reader)
for row in reader:
print(row[0], row[5], row[6], row[7])
read_csv('artists.csv')
BASE_URL = 'https://www.credit-suisse.com/jp/ja/investment-services/research/regional-strategy/'
def get_table_data(url, xpath):
import requests
from bs4 import BeautifulSoup
source = requests.get(url)
soup = BeautifulSoup(source.content, 'lxml')
print(soup.select_one(xpath).get_text())
get_table_data(BASE_URL, '//*[@id="rankingChart"]/tbody/tr[22]/td')
def replace(df, column_name, original, replacement):
df[column_name] = df[column_name].str.replace(original, replacement)
return df
replace(df, column_name, original, replacement)
def extract_name(df):
name_list = []
for c in df.columns:
name_list.append(c)
return name_list
def get_survey_period(text):
start = text.find('調査対象期間:') + len('調査対象期間:')
end = text.find('〜', start)
return text[start : end]
In [1]: S = '実査日: 2022年8月20日\u3000調査対象期間:2022年8月13日~8月19日'
In [2]: S.find('調査対象期間:')
Out[2]: 17
In [3]: S[17:].find(~')
Out[3]: 14
In [4]: S[17+14:]
Out[4]: '2022年8月13日'
# FOR LOOP
for c in df_cc['2 characters']:
# 取得したいチャートの日付を指定する
d = '2022-11-17'
# URLの作成
url = f'https://charts.spotify.com/charts/view/regional-{c}-weekly/{d}'
print(url)
# Seleniumを使うための設定とSpotifyの画面への遷移
options = Options()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--start-fullscreen')
options.add_argument('--disable-plugins')
options.add_argument('--disable-extensions')
# Initialize the Chrome webdriver
driver =
def get(data_structure, keys):
for key in keys:
data_structure = data_structure[key]
return data_structure
get({'a': {'b': 'c'}}, ['a', 'b']) # 'c' を返します
# input: pd.DataFrame
# output: np.array
def adjacent_matrix(df):
# adjacent matrix
adjacent_matrix = np.zeros((len(df),len(df)))
# fill adjacent matrix
for idx, row in df.iterrows():
adjacent_matrix[idx][row.to_numpy()] = 1
return adjacent_matrix
def NN_matrix(df):
NN_matrix = np.zeros((len(df), len(df)))
for i in range(0, len(df)):
for j in range(0, len(df)):
# euclidean distance
NN_matrix[i,j] = np.sqrt(sum((df.iloc[i,:] - df.iloc[j,:])**2))
return NN_matrix
def get_date(text):
text = text.replace('\n', '')
before, target, after = text.split('調査対象期間:')
target, after = target.split('〜', 1)
target = target.strip()
after = after.split(' ', 1)
after = after[0].strip()
return target, after
get_date('調査対象期間: 2020-07-18 〜 2020-07-24')
import time
def sleep(index):
if index % 901 == 0:
time.sleep(1)
return index
sleep(902)
def read_csv(file, cols):
with open(file, "r") as f:
reader = csv.reader(f)
rows = [row for row in reader]
return [str(row[col]) for row in rows]
from bs4 import BeautifulSoup
import requests
from bs4 import BeautifulSoup
import urllib.request
from urllib.request import urlopen
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from urllib.parse import urlparse
def get_title(url):
html = urlopen(url)
soup = BeautifulSoup(html, "html.parser")
return soup.title.text
def get_links(url):
a = urlparse(url)
base = a.scheme + "://" + a.netloc
html = urlopen(url)
soup = BeautifulSoup(html, "html.parser")
links = []
for link in soup.find_all("a"):
href = link.attrs["href"]
if href.startswith("/"):
url = base + href
if url not in links:
links.append(url)
elif base in href:
if href not in links
def create_graph_from_data(df):
G = nx.Graph()
for i in range(len(df)):
G.add_edge(df.iloc[i, 0], df.iloc[i, 1])
return G
response.xpath(xpath).extract()
def remove_second(df):
df['time'] = df['time'].dt.strftime('%Y-%m-%d %H:%M')
return df
def add_edge(self, edge):
src, dst, weight = edge
src_node = self.node_list[src]
dst_node = self.node_list[dst]
src_node.add_edge(Edge(src_node, dst_node, weight))
dst_node.add_edge(Edge(dst_node, src_node, weight))
import requests
from bs4 import BeautifulSoup
def scrape_ranking_table(url):
response = requests.get(url)
parser = BeautifulSoup(response.text, "html.parser")
ranking_table = parser.find("table", id="rankingChart")
last_row = ranking_table.find_all("tr")[-1]
return last_row.text
scrape_ranking_table("https://www.melon.com/chart/index.htm")
browser.current_url
def replace_str(df, colname, before, after):
df[colname] = df[colname].astype(str).str.replace(before, after)
return df
replace_str(df, 'txt', '-', ' ')
def create_adjacency_matrix(data_source):
adjacency_matrix = np.zeros((160, 160), dtype=np.int)
for index, row in data_source.iterrows():
adjacency_matrix[row['row_index'], row['column_index']] = 1
return adjacency_matrix
create_adjacency_matrix(data_source)
def get_date(text):
start = text.find("調査対象期間:") + 6
end = text.find("〜", start)
return text[start:end]
get_date("今日は調査対象期間:2018年1月〜2018年2月まで")
import spotipy
import pandas as pd
spotify = spotipy.Spotify()
def artist_related_artists(artist, limit=50):
result = spotify.artist_related_artists(artist)
artists = []
for item in result['artists']:
artists.append([artist, item['name']])
return artists
df = pd.DataFrame(artist_related_artists('Kendrick Lamar'), columns=['artist', 'related_artist'])
def get_kimono_text(html):
soup = BeautifulSoup(html, "html.parser")
node = soup.xpath('//*[@id="left-contents"]/div[1]/div/div/div[1]/div/div[3]/p[2]/text()[3]')
if node:
return node[0].strip()
else:
return None
import re
def get_date(text):
return re.search('調査対象期間:(.*)〜.*', text)[1]
get_date(text)
def get_text(file_name):
with open(file_name, 'r', encoding='utf-8') as file:
soup = BeautifulSoup(file.read(), 'lxml')
return soup.text
import openpyxl
# open the input file
in_file = openpyxl.load_workbook('test.xlsx')
in_sheet = in_file.active
# create a new file
out_file = openpyxl.Workbook()
out_sheet = out_file.active
out_sheet.title = 'Japan'
# copy data
for row in in_sheet.rows:
if row[0].value == 'Japan':
out_sheet.append(row)
# write the output file
out_file.save('out.xlsx')
def convert_timezone(df, timezone_col):
df[timezone_col] = df[timezone_col].map(lambda x: x.tz_convert('Asia/Tokyo'))
return df
if __name__ == "__main__":
convert_timezone(df, 'date')
def get_column(fname, col_num):
col_list = []
with open(fname) as f:
reader = csv.reader(f)
col_list = [row[col_num] for row in reader]
return col_list
def random_sleep(min_sec, max_sec):
random_sec = random.uniform(min_sec, max_sec)
time.sleep(random_sec)
import requests
import json
def get_views(title):
url = "https://ja.wikipedia.org/w/api.php"
params = {
'action': 'query',
'format': 'json',
'titles': title,
'prop': 'info',
'inprop': 'url|talkid|watched|subjectid|readable|preload|displaytitle',
'uselang': 'user'
}
r = requests.get(url, params = params)
data = json.loads(r.text)
pages = data['query']['pages']
for key in pages:
print(pages[key]['title'])
print(pages[key]['views'])
get_views('特定秘密保護法')
import math
def prime(k):
if k <= 1:
return False
for i in range(2, int(math.sqrt(k)) + 1):
if k % i == 0:
return False
return True
def primes(n):
for i in range(2, n + 1):
if prime(i):
print(i, end=',')
primes(100)
import twitter
import json
api = twitter.Api(consumer_key='XXXX',
consumer_secret='XXXX',
access_token_key='XXXX',
access_token_secret='XXXX')
user_ids = [27260086, 783214]
users = api.UsersLookup(user_id=user_ids)
for user in users:
print(user.screen_name)
def read_csv(file, cols):
with open(file, "r") as f:
reader = csv.reader(f)
rows = [row for row in reader]
return [str(row[col]) for row in rows]
import osmnx as ox
import networkx as nx
def get_area(lat, lng):
# construct a network from the point
area = ox.graph_from_point((lat, lng), distance=3000, network_type='drive')
# get the area in square meters
area_m2 = ox.project_gdf(ox.graph_to_gdfs(area, edges=True)).unary_union.convex_hull.area
return area_m2
import pandas as pd
import numpy as np
def func(df, v):
matrix = df.pivot_table(index=v, columns=v, values='value', aggfunc=np.size)
matrix.fillna(0, inplace=True)
matrix = matrix.apply(lambda x: x/sum(x))
return matrix
data = pd.DataFrame({'A':['a','a','b','b'], 'B':['c', 'd', 'c', 'd'], 'value':[1,1,1,1]})
func(data, 'A')
import pandas as pd
df = pd.read_csv("data/sales_data.csv")
df.head()
def get_google_links(search_query):
# using google to search
r = requests.get("http://www.google.co.jp/search?q=" + search_query)
data = r.text
# parse html
soup = BeautifulSoup(data, "lxml")
all_divs = soup.find_all("a")
links = []
for div in all_divs:
if "http" in div['href']:
links.append(div['href'])
return links
import openpyxl
# 日付を文字列で取得
def get_date(value):
if value:
return value.strftime("%Y/%m/%d")
else:
return ""
# ws:ワークシート
def grouping(ws):
# グルーピングする列番号
target_column = 3
# グルーピングする日付のリスト
target_dates = []
# グルーピングした行番号のリスト
target_rows = []
# グルーピングしたデータを入れる辞書
dates_data = {}
# 列数
col_size = ws.max_column
# 行数
row_size = ws.max_row
# 行ごとに、データがあれ
def reset_index(df: pd.DataFrame) -> pd.DataFrame:
"""
Reset index of df and convert index to column.
Args:
df (pd.DataFrame): target dataframe.
Returns:
pd.DataFrame: dataframe with reset index.
"""
return df.reset_index().rename(columns={'index': '__index__'})
from bs4 import BeautifulSoup
import requests
def get_data(url):
res = requests.get(url)
soup = BeautifulSoup(res.content, 'html.parser')
return soup.find('td', attrs={'class': 'td_count'}).text
get_data('https://chord-songs.com/chord/piano/artists/8650')
# import openpyxl
import openpyxl
# open xlsx file
datasheet = openpyxl.load_workbook('datasheet.xlsx')
# create new workbook
new_datasheet = openpyxl.Workbook()
# get sheetnames
sheet_names = datasheet.get_sheet_names()
# create sheet
new_datasheet.create_sheet(index = 0, title = 'new_sheet')
# get sheet
sheet = datasheet.get_sheet_by_name(sheet_names[0])
# get sheet
new_sheet = new_datasheet.get_sheet_by_name('new_sheet')
# get rows and cols
rows = sheet.max_row
cols = sheet.max_column
# get first col
for i in range(1, sheets.max_row + 1):
if sheet.cell(row = i, column = 1).value != 'None':
print(sheet.cell(row = i, column = 1).value
import openpyxl
from datetime import datetime
wb = openpyxl.load_workbook('file.xlsx')
wb.worksheets[0].title = 'Sheet1'
ws1 = wb.worksheets[0]
ws_dict = {}
for row in ws1['A1':'C20']:
date = row[2].value
date = datetime.strptime(str(date), '%Y-%m-%d')
week = date.strftime('%Y-%W')
if week not in ws_dict:
ws_dict[week] = wb.create_sheet(title=week)
for cell in row:
ws_dict[week].append(cell.value)
wb.save('output.xlsx')
def rename(df, col_before, col_after):
df[col_after] = df[col_before]
return df
rename(df=df, col_before="name", col_after="new_name")
def get_date_string(string):
return re.search('調査対象期間:(.*)〜', string).group(1)
print(get_date_string('調査対象期間:2020/08/01〜2020/08/31'))
from wordcloud import WordCloud
def cloud(word_data, background_color="black"):
wordcloud = WordCloud(background_color=background_color, font_path="./mplus-1m-bold.ttf").generate(word_data)
return wordcloud
from bs4 import BeautifulSoup
import requests
def fetch_ranking():
url = 'https://www.dmm.com/'
r = requests.get(url)
soup = BeautifulSoup(r.content, 'lxml')
table = soup.find('table', id='rankingChart')
last_row_tds = table.findAll('td')[-1]
return last_row_tds.text
def get_float_from_hex(hex_str):
return struct.unpack('!f', struct.pack('!i', int(hex_str[2:], 16)))[0]
get_float_from_hex('0x42f60000')
def group_by_date(filepath):
book = openpyxl.load_workbook(filepath)
sheet = book.active
# get values of column C in sheet1
column_date = sheet['C']
column_date_values = [cell.value for cell in column_date]
# remove None
column_date_values = [value for value in column_date_values if value is not None]
# convert datetime.date to datetime.datetime
column_date_values = [datetime.datetime(value.year, value.month, value.day) for value in column_date_values]
# convert datetime.datetime to str
column_date_values = [str(value.date()) for value in column_date_values]
# delete duplicate values
column_date_values = set(column_date_values)
# group data by date
grouped_data = {}
for date_value in column_date_values:
grouped_data[date_value] = []
def get_target_date(s):
return s[s.index("調査対象期間:") + len("調査対象期間:"):s.index("~")]
get_target_date("実査日: 2022年8月20日 調査対象期間:2022年8月13日~8月19日")
import requests
from bs4 import BeautifulSoup
url = 'http://www.mhlw.go.jp/toukei/saikin/hw/kansen/kansen_h25_1.html'
html = requests.get(url).text
soup = BeautifulSoup(html, 'html.parser')
for tag in soup.find_all('tr'):
if tag.has_attr('class'):
print(tag.text)
import datetime
from openpyxl import load_workbook
from openpyxl.styles import Font
from openpyxl.utils import get_column_letter
# ファイルの読み込み
wb = load_workbook('sample.xlsx')
sheet = wb['Sheet1']
# 日付を取得する
dates = []
for cell in sheet['C']:
if isinstance(cell.value, datetime.datetime):
dates.append(cell.value)
# 日付ごとにシートを分ける
for date in dates:
sheet_name = date.strftime('%Y-%m-%d')
# シートがなければ作成
if wb.get_sheet_by_name(sheet_name) is None:
wb.create_sheet(title=sheet_name)
# シートにコピー
w
import openpyxl
import datetime
def split_sheet_by_date(excel_file_path, sheet_name):
wb = openpyxl.load_workbook(excel_file_path)
sheet = wb.get_sheet_by_name(sheet_name)
date_max_col = sheet.max_column
date_max_row = sheet.max_row
date_list = []
for i in range(1, date_max_row + 1):
date = sheet.cell(row = i, column = 3).value
date_list.append(date)
date_set = set(date_list)
for date in date_set:
wb.create_sheet(date)
new_sheet = wb.get_sheet_by_name(date)
for i in range(1, date_max_row + 1):
if sheet.cell(row = i, column = 3).value == date:
for j in range(1, date_max_
l = [1, 2, 3]
def head(l):
return l[0]
head(l)
import re
def fetch_date(text):
return re.search(r'調査対象期間:(.+?)〜', text).group(1)
fetch_date('調査(掲載ptは小数点第1で四捨五入)実査日:2022年12月3日 調査対象期間:2022年11月26日〜12月2日')
Generate
More than just a code generator. A tool that helps you with a wide range of tasks. All in one place.
Function from Description
Text Description to SQL Command
Translate Languages
Generate HTML from Description
Code to Explanation
Fix invalid Code
Get Test for Code
Class from Description
Regex from Description
Regex to Explanation
Git Command from Description
Linux Command
Function from Docstring
Add typing to code
Get Language from Code
Time complexity
CSS from Description
Meta Tags from Description