CONCATENATE DATAFRAMES
GRADE XII
#The concat() function is used to join more than one dataframe into one unit.
#You can combine dataframes having similar structures.
import pandas as pd
'''
df1 = pd.DataFrame(
{
"A": ["A0", "A1", "A2", "A3"],
"B": ["B0", "B1", "B2", "B3"],
"C": ["C0", "C1", "C2", "C3"],
"D": ["D0", "D1", "D2", "D3"],
},
index=[0, 1, 2, 3],)
df2 = pd.DataFrame(
{
"A": ["A4", "A5", "A6", "A7"],
"B": ["B4", "B5", "B6", "B7"],
"C": ["C4", "C5", "C6", "C7"],
"D": ["D4", "D5", "D6", "D7"],
},
index=[4, 5, 6, 7])
df3 = pd.DataFrame(
{
"A": ["A8", "A9", "A10", "A11"],
"B": ["B8", "B9", "B10", "B11"],
"C": ["C8", "C9", "C10", "C11"],
"D": ["D8", "D9", "D10", "D11"],
},
index=[8, 9, 10, 11])
frames = [df1, df2, df3]
result = pd.concat(frames)
print(result)
result1 = pd.concat([df1, df3], axis=1) # Concat Columnwise
print(result1)
print("*"*40)
result2 = pd.concat([df1, df3], axis=0) # concat row wise
print(result2)
print("*"*40)
result3 = pd.concat([df1, df3], ignore_index=True)
print(result3)
'''
'''
#You can add ignore_index = true to avoid using the same original index of dataframes.
dt_sc=({'English':[74,79,48,53,68,44,65,67],
'Physics':[76,78,80,76,73,55,49,60],
'Chemistry':[57,74,55,89,70,50,60,80],})
xii_1=pd.DataFrame(dt_sc)
dt_co=({'English':[66,65,87,56,86,44,56,76],
'Physics':[67,87,80,67,77,55,45,80],
'Chemistry':[75,47,55,98,70,50,60,80],})
xii_2=pd.DataFrame(dt_co)
xii=pd.concat([xii_1,xii_2])
#print(xii)
#You can add ignore_index = true to avoid using the same
#original index of dataframes
#observe the output
xii=pd.concat([xii_1,xii_2],ignore_index=True)
print(xii)
'''
#It is used to merge two dataframes that have some common values.
#You can specify the fields as on parameter in the merge() function.
#It follows the concept of RDBMS having parent column and child columns in the dataframe.
#One column should have common data.
p1=({'P_ID':[1,2,5,4,5],
'First_Name':['Sachin','Saurav','Virendra','Mahendra Sinh','Gautam'],
'Last_Name':['Tendulker','Ganguly','Sehvag','Dhoni','Gambhir']})
d1=pd.DataFrame(p1)
print(d1)
print("*"*40)
p2=({'P_ID':[1,2,3,4,5],
'Runs':[18987,12120,11345,10345,12789]})
d2=pd.DataFrame(p2)
print(d2)
print("*"*40)
players=pd.merge(d1,d2)
print(players)
'''
# MERGING OF DATAFRAMES
left = pd.DataFrame(
{
"key": ["K0", "K1", "K2", "K3"],
"A": ["A0", "A1", "A2", "A3"],
"B": ["B0", "B1", "B2", "B3"],
}
)
right = pd.DataFrame(
{
"key": ["K0", "K1", "K2", "K3"],
"C": ["C0", "C1", "C2", "C3"],
"D": ["D0", "D1", "D2", "D3"],
}
)
#result = pd.merge(left, right, on="key")
result = pd.merge(left, right)
print(result)
# Define a dictionary containing employee data
data1 = {'key': ['K0', 'K1', 'K2', 'K3'],
'key1': ['K0', 'K1', 'K0', 'K1'],
'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'],
'Age':[27, 24, 22, 32],}
# Define a dictionary containing employee data
data2 = {'key': ['K0', 'K1', 'K2', 'K3'],
'key1': ['K0', 'K0', 'K0', 'K0'],
'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'],
'Qualification':['Btech', 'B.A', 'Bcom', 'B.hons']}
# Convert the dictionary into DataFrame
df = pd.DataFrame(data1)
print("*"*40)
# Convert the dictionary into DataFrame
df1 = pd.DataFrame(data2)
print(df, "\n\n", df1)
print("*"*40)
#Now we merge dataframe using multiple keys # merging dataframe using multiple keys
res1 = pd.merge(df, df1, on=['key', 'key1'])
print(res1)
#print("*"*40)
#res = pd.merge(df, df1, how='left', on=['key', 'key1'])
#print(res)
print("*"*40)
res = pd.merge(df, df1, how='right', on=['key', 'key1'])
print(res)
#In order to join dataframe, we use .
#join() function this function is used for combining the columns of two
#potentially differently-indexed DataFrames into a single result DataFrame.
# Define a dictionary containing employee data
data1 = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'],
'Age':[27, 24, 22, 32]}
# Define a dictionary containing employee data
data2 = {'Address':['Allahabad', 'Kannuaj', 'Allahabad', 'Kannuaj'],
'Qualification':['MCA', 'Phd', 'Bcom', 'B.hons']}
# Convert the dictionary into DataFrame
df = pd.DataFrame(data1,index=['K0', 'K1', 'K2', 'K3'])
# Convert the dictionary into DataFrame
df1 = pd.DataFrame(data2, index=['K0', 'K2', 'K3', 'K4'])
# joining dataframe
res = df.join(df1)
print(res)
print(df, "\n\n", df1)
Comments
Post a Comment