In the last few years there has been a large amount of redundancies in the tech sector around the world and it keeps on going.
In this example we will look at a dataset with layoff data and we will use python and various libraries to analyze this data and display some information in graphs
Code
As usual we will import the required libraries and analyse the data
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import warnings
df = pd.read_csv('layoffs.csv')
#show the data
print(df.head())
print(df.describe().T)
#any empty data
print(df.isna().sum())
This shows a lot of empty cells
company 0
location 0
industry 2
total_laid_off 799
percentage_laid_off 851
date 2
stage 6
country 0
funds_raised 248
Lets fill these with values and check again
#fill empty values
df['total_laid_off'] = df['total_laid_off'].fillna(0)
df['percentage_laid_off'] = df['percentage_laid_off'].fillna(0)
df['stage'] = df['stage'].fillna(0)
df['funds_raised'] = df['funds_raised'].fillna(0)
df['industry'] = df['industry'].fillna(0)
#show the data
print(df.head())
print(df.describe().T)
#any empty data
print(df.isna().sum())
company 0
location 0
industry 0
total_laid_off 0
percentage_laid_off 0
date 2
stage 0
country 0
funds_raised 0
#top 10 companies by layoff numbers
df.groupby('company').total_laid_off.sum().sort_values(ascending=False)[:10]
fig = px.bar(df.groupby('company').total_laid_off.sum().sort_values(ascending=False)[:10],text_auto=True,title='Top 10 companies that laid off from 2020 to 2022',
labels={"x":"Company","y":"Layoffs"})
fig.show()
#top sectors
sectors = np.array(df.groupby('industry')['total_laid_off'].sum().sort_values(ascending=False).head())
arr = np.array(['transportation','Consumer','Retail','Finance','Food'])
plt.figure(figsize= (8 ,6))
plt.bar(arr,sectors)
plt.xlabel('Sectors',fontdict={'size':12,'color':'orange'})
plt.ylabel('count',fontdict={'size':12,'color':'orange'})
plt.title('Top Sectors by layoffs',fontdict={'size':18,'color':'orange'})
plt.show()
#top countries
countries = np.array(df.groupby('country')['total_laid_off'].sum().sort_values(ascending=False).head())
arr1 = np.array(['US','India','Brazil','Singapore','Canada'])
plt.figure(figsize=(7,7))
ax = sns.barplot(x=countries, y=arr1, data=df)
plt.title('Most Affected Countries')
plt.show()