xxxxxxxxxx
# Extract 10% of your dataframe
df.sample(frac=0.1, replace=True, random_state=1)
xxxxxxxxxx
>>> df['num_legs'].sample(n=3, random_state=1)
fish 0
spider 8
falcon 2
Name: num_legs, dtype: int64
xxxxxxxxxx
>>> import pandas as pd
>>> sentence = 'The quick brown fox jumps over a lazy dog.'
>>> words = sentence.split(' ')
>>> df1 = pd.DataFrame({'key': range(len(words)),
'column1_Words': words,
'column2_Length': [len(x) for x in words]
})
>>> df1
key column1_Words column2_Length
0 0 The 3
1 1 quick 5
2 2 brown 5
3 3 fox 3
4 4 jumps 5
5 5 over 4
6 6 a 1
7 7 lazy 4
8 8 dog. 4
>>>
xxxxxxxxxx
df.sample(frac=0.5, replace=True, random_state=1)
num_legs num_wings num_specimen_seen
dog 4 0 2
fish 0 0 8
xxxxxxxxxx
import pandas
import random
n = 1000000 #number of records in file
s = 10000 #desired sample size
filename = "data.txt"
skip = sorted(random.sample(range(n),n-s))
df = pandas.read_csv(filename, skiprows=skip)
xxxxxxxxxx
>>> df = pd.DataFrame({'num_legs': [2, 4, 8, 0],
'num_wings': [2, 0, 0, 0],
'num_specimen_seen': [10, 2, 1, 8]},
index=['falcon', 'dog', 'spider', 'fish'])
>>> df
num_legs num_wings num_specimen_seen
falcon 2 2 10
dog 4 0 2
spider 8 0 1
fish 0 0 8
xxxxxxxxxx
import pandas
import random
filename = "data.txt"
n = sum(1 for line in open(filename)) - 1 #number of records in file (excludes header)
s = 10000 #desired sample size
skip = sorted(random.sample(range(1,n+1),n-s)) #the 0-indexed header will not be included in the skip list
df = pandas.read_csv(filename, skiprows=skip)