import pandas as pd # for the data.
import numpy as np # for a NaN type
import matplotlib.pyplot as plt # For plotting, and some customization of plots.
import seaborn as sns # For pretty plots.
# Fix the size of the graphs
set(rc={"figure.figsize":(11, 8)}) sns.
This is Part Two of a Series towards scraping, cleaning and analyzing the Jobs for The Cycle: Frontier. If you haven’t read Part One then I’d suggest start there. We’re picking up now with cleaning the tasks to complete for the job instead. So, let’s get started! We’ll pull our normal libraries for working on projects like this.
We’re actually going to be using the same data table as before from the Official Wiki and the Jobs Page. Like before, wer’e going to use the same read_html()
call targetting the name
class.
# game taken down
# url = "https://thecyclefrontier.wiki/wiki/Jobs"
= 'https://archive.ph/HZfyf'
url = pd.read_html(url, match="Name",
site = {
converters "Name": str,
"Description": str,
"Unlocked": int,
"Tasks": str,
"Rewards": str})
And here is the data in the weird rows and columns like before:
# Weird Problem: Data Looks funny, can still use this:
0].head(8) site[
Name | Description | Unlock Level | Difficulty | Tasks | Rewards | |
---|---|---|---|---|---|---|
0 | New Mining Tools | We are producing new Mining Tools for new Pros... | 4.0 | Easy | Collect: 2 Hydraulic Piston 10 Hardened Metals | 3800 K-Marks 1 Korolev Scrip 15 Korolev R... |
1 | 3800 | K-Marks | NaN | NaN | NaN | NaN |
2 | 1 | Korolev Scrip | NaN | NaN | NaN | NaN |
3 | 15 | Korolev Reputation | NaN | NaN | NaN | NaN |
4 | Explosive Excavation | One of our mines collapsed with valuable equip... | 7.0 | Medium | Collect: 4 Derelict Explosives | 11000 K-Marks 8 Korolev Scrip 52 Korolev ... |
5 | 11000 | K-Marks | NaN | NaN | NaN | NaN |
6 | 8 | Korolev Scrip | NaN | NaN | NaN | NaN |
7 | 52 | Korolev Reputation | NaN | NaN | NaN | NaN |
We’re going to make a copy of the specific columns we want to avoid any strange insert/update issues.
= site[0][["Name", "Description", "Tasks"]].copy()
tasksSubset tasksSubset
Name | Description | Tasks | |
---|---|---|---|
0 | New Mining Tools | We are producing new Mining Tools for new Pros... | Collect: 2 Hydraulic Piston 10 Hardened Metals |
1 | 3800 | K-Marks | NaN |
2 | 1 | Korolev Scrip | NaN |
3 | 15 | Korolev Reputation | NaN |
4 | Explosive Excavation | One of our mines collapsed with valuable equip... | Collect: 4 Derelict Explosives |
... | ... | ... | ... |
183 | 470 | Korolev Reputation | NaN |
184 | No Expiry Date | There you are, finally! There's been an accide... | Collect: 10 Old Medicine |
185 | 6300 | K-Marks | NaN |
186 | 9 | Korolev Scrip | NaN |
187 | 62 | Korolev Reputation | NaN |
188 rows × 3 columns
# get rid of the middle stuff we don't need.
= tasksSubset[ ~tasksSubset.Tasks.isna()]
tasksSubset 15) tasksSubset.head(
Name | Description | Tasks | |
---|---|---|---|
0 | New Mining Tools | We are producing new Mining Tools for new Pros... | Collect: 2 Hydraulic Piston 10 Hardened Metals |
4 | Explosive Excavation | One of our mines collapsed with valuable equip... | Collect: 4 Derelict Explosives |
8 | Mining Bot | Our engineers have designed an autonomous mini... | Collect: 2 Zero Systems CPU 3 Ball Bearings |
12 | None of your Business | Prospector. We need Toxic Glands. Don't ask qu... | Collect: 2 Toxic Glands |
16 | Insufficient Processing Power | Prospector! The Zero Systems CPU you brought u... | Collect: 1 Master Unit CPU |
20 | Excavator Improvements | The suspension on our mining excavators need i... | Collect: 2 Co-TEC MultiTool 3 Ball Bearings 3 ... |
24 | A new type of Alloy | Our scientists are confident they can create a... | Collect: 4 Hardened Bone Plates 12 Compound Sh... |
28 | Automated Security | We will have to build new turrets to help prot... | Collect: 5 Zero Systems CPU 16 Hardened Metals |
32 | Energy Crisis | Veltecite supplies are low, but we need energy... | Collect: 4 Miniature Reactor |
36 | Classified I | Prospector! We need Derelict Explosives, Maste... | Collect: 10 Derelict Explosives 2 Master Unit ... |
40 | Clear Veltecite | The Veltecite you brought us the other day is ... | Collect: 2 Clear Veltecite |
44 | Time to Focus | One of our miners searching the Jungle for Foc... | Kill 6 Creatures at Jungle Collect: 4 Focus Cr... |
48 | Pure Veltecite | The Clear Veltecite was an improvement, we gai... | Collect: 2 Pure Veltecite |
52 | Titans of Industry | Scouts have found Titan Ore deposits on Fortun... | Collect: 2 Titan Ore 6 Altered Nickel |
56 | Crystal Frenzy | We're working on a new type of laser for our l... | Collect: 2 Clear Veltecite 8 Focus Crystal |
As I discussed in the previous post, there are three kinds of jobs: Collect, Deposit and Kill. The Collect and Deposit jobs are fine since they involved something easily quantifiable: loot. However, the Kill quests present a very real problem since there is no simple way to address quanitfying them. For killing creatures, maybe we could take the sum of their expected drops and their rate of drop and include that as part of the rewards? Of course, the player may simply choose not to pick any of that up.
Another problem is killing players; how much is a player kill actually worth? And, the difficulty of killing players is connected to the skill level of each player - which we also cannot know. Therefore, I’ve elected to remove the Kill Jobs from the analysis.
# anything with kill just remove until I can think of a better way to deal with this.
= tasksSubset[ ~tasksSubset.Tasks.str.contains("Kill")]
tasksSubset 15) tasksSubset.head(
Name | Description | Tasks | |
---|---|---|---|
0 | New Mining Tools | We are producing new Mining Tools for new Pros... | Collect: 2 Hydraulic Piston 10 Hardened Metals |
4 | Explosive Excavation | One of our mines collapsed with valuable equip... | Collect: 4 Derelict Explosives |
8 | Mining Bot | Our engineers have designed an autonomous mini... | Collect: 2 Zero Systems CPU 3 Ball Bearings |
12 | None of your Business | Prospector. We need Toxic Glands. Don't ask qu... | Collect: 2 Toxic Glands |
16 | Insufficient Processing Power | Prospector! The Zero Systems CPU you brought u... | Collect: 1 Master Unit CPU |
20 | Excavator Improvements | The suspension on our mining excavators need i... | Collect: 2 Co-TEC MultiTool 3 Ball Bearings 3 ... |
24 | A new type of Alloy | Our scientists are confident they can create a... | Collect: 4 Hardened Bone Plates 12 Compound Sh... |
28 | Automated Security | We will have to build new turrets to help prot... | Collect: 5 Zero Systems CPU 16 Hardened Metals |
32 | Energy Crisis | Veltecite supplies are low, but we need energy... | Collect: 4 Miniature Reactor |
36 | Classified I | Prospector! We need Derelict Explosives, Maste... | Collect: 10 Derelict Explosives 2 Master Unit ... |
40 | Clear Veltecite | The Veltecite you brought us the other day is ... | Collect: 2 Clear Veltecite |
48 | Pure Veltecite | The Clear Veltecite was an improvement, we gai... | Collect: 2 Pure Veltecite |
52 | Titans of Industry | Scouts have found Titan Ore deposits on Fortun... | Collect: 2 Titan Ore 6 Altered Nickel |
56 | Crystal Frenzy | We're working on a new type of laser for our l... | Collect: 2 Clear Veltecite 8 Focus Crystal |
60 | Geologist | You got time for a job, Prospector? The sample... | Collect: 2 Pure Veltecite 1 Pure Focus Crystal |
So, each Job can request you collect multiple loots as well as more than one type of loot. Here is a good example of what I mean by this:
28].Tasks tasksSubset.loc[
'Collect: 5 Zero Systems CPU 16 Hardened Metals'
As you can see, this task requires you to collect both Zero System CPUs as well as Hardened Metals - and a good number of them. What we want is to not only extract each type of loot independently of each other but also to keep the count paired with the loot type.
I would like to take this moment to thank the developers of Pandas. I spent a bit of time thinking about how I would solve this and they had already included a solution to this problem: extractall()
. What this does is allows you to pass Regular Expressions and it will then pull out anything in the string which matches. It even puts them into their own separate rows! Again, thank you!
For Regular Expressions, this is something you will have to learn on your own. I used a website to test and build mine from an example row; there is plenty of documentaiton about how to use these.
= r"(\d+\s[\w]+\s[\w]+)"
regex = tasksSubset.Tasks.str.extractall(regex)
tmp 15) tmp.head(
0 | ||
---|---|---|
match | ||
0 | 0 | 2 Hydraulic Piston |
1 | 10 Hardened Metals | |
4 | 0 | 4 Derelict Explosives |
8 | 0 | 2 Zero Systems |
1 | 3 Ball Bearings | |
12 | 0 | 2 Toxic Glands |
16 | 0 | 1 Master Unit |
20 | 0 | 3 Ball Bearings |
1 | 3 Hydraulic Piston | |
24 | 0 | 4 Hardened Bone |
1 | 12 Compound Sheets | |
28 | 0 | 5 Zero Systems |
1 | 16 Hardened Metals | |
32 | 0 | 4 Miniature Reactor |
36 | 0 | 10 Derelict Explosives |
Perfect! Now we have all the different loot and we got to keep the row’s index for later when we’ll attach the job name and description. Before that though, we’ll need to do some work to separate the count and the loot type into their own columns. While I’m sure there is a better way to do this, I could not think of one so we’re going to write a function to break the loot and count apart and then return them.
There is a solid function for this already called .split()
and we’re going to use it to split on spaces but we since some of the loot is multiple words we need to force it to only split once.
= tmp.reset_index()[0][1]
example = example.split(' ', maxsplit=1)
parts = int(parts[0]), parts[1]
number, loot number, loot
(10, 'Hardened Metals')
Now we’ll create the function. What we can do here though is return either the count
value or the loot
value depending on an index passed: 0 for count
and 1 for loot
.
def breakLoot(taskString, index=0):
= taskString.split(' ', maxsplit=1)
parts if index == 0:
return int(parts[index])
elif index == 1:
return parts[index]
else:
# This shouldn't be called.
return None
Now we just run two .apply()
calls to get the values out:
= tmp.reset_index()[0].apply(breakLoot).values
count = tmp.reset_index()[0].apply(breakLoot, index=1).values aLoot
And, then assign them to our brand new columns for them.
= tmp.assign(
tmp = count,
count = aLoot
loot
)
15) tmp.head(
0 | count | loot | ||
---|---|---|---|---|
match | ||||
0 | 0 | 2 Hydraulic Piston | 2 | Hydraulic Piston |
1 | 10 Hardened Metals | 10 | Hardened Metals | |
4 | 0 | 4 Derelict Explosives | 4 | Derelict Explosives |
8 | 0 | 2 Zero Systems | 2 | Zero Systems |
1 | 3 Ball Bearings | 3 | Ball Bearings | |
12 | 0 | 2 Toxic Glands | 2 | Toxic Glands |
16 | 0 | 1 Master Unit | 1 | Master Unit |
20 | 0 | 3 Ball Bearings | 3 | Ball Bearings |
1 | 3 Hydraulic Piston | 3 | Hydraulic Piston | |
24 | 0 | 4 Hardened Bone | 4 | Hardened Bone |
1 | 12 Compound Sheets | 12 | Compound Sheets | |
28 | 0 | 5 Zero Systems | 5 | Zero Systems |
1 | 16 Hardened Metals | 16 | Hardened Metals | |
32 | 0 | 4 Miniature Reactor | 4 | Miniature Reactor |
36 | 0 | 10 Derelict Explosives | 10 | Derelict Explosives |
And, there we go! We have our columns how we want them. Now we just need to work on getting the Name
and Description
values attached to our new data frame. One way to do this would be to do some sort of merge or join based on the index we’ve saved. Or, we can do something even easier!
If we look at the rows when we do an index reset:
8) tmp.reset_index().head(
level_0 | match | 0 | count | loot | |
---|---|---|---|---|---|
0 | 0 | 0 | 2 Hydraulic Piston | 2 | Hydraulic Piston |
1 | 0 | 1 | 10 Hardened Metals | 10 | Hardened Metals |
2 | 4 | 0 | 4 Derelict Explosives | 4 | Derelict Explosives |
3 | 8 | 0 | 2 Zero Systems | 2 | Zero Systems |
4 | 8 | 1 | 3 Ball Bearings | 3 | Ball Bearings |
5 | 12 | 0 | 2 Toxic Glands | 2 | Toxic Glands |
6 | 16 | 0 | 1 Master Unit | 1 | Master Unit |
7 | 20 | 0 | 3 Ball Bearings | 3 | Ball Bearings |
… we can see that the column level_0
actaully contains duplicate indexes from our matches. So, the values Hydraulic Piston
and Hardened Metals
both are associated with Task with index 0. As long as we can use that index to get duplicate values then we can just pull all the Name
and Descriptions
in their matching order.
5, "level_0"], ['Name', 'Description']] tasksSubset.loc[tmp.reset_index().loc[:
Name | Description | |
---|---|---|
0 | New Mining Tools | We are producing new Mining Tools for new Pros... |
0 | New Mining Tools | We are producing new Mining Tools for new Pros... |
4 | Explosive Excavation | One of our mines collapsed with valuable equip... |
8 | Mining Bot | Our engineers have designed an autonomous mini... |
8 | Mining Bot | Our engineers have designed an autonomous mini... |
12 | None of your Business | Prospector. We need Toxic Glands. Don't ask qu... |
… which is exactly what we get! Duplicates! Time to slice it out and then assign the values.
= tasksSubset.loc[tmp.reset_index()["level_0"], ['Name', 'Description']]
nameDescriptSlice
= tmp.assign(
tmp = nameDescriptSlice.Name.values,
name = nameDescriptSlice.Description.values
description
)
15) tmp.head(
0 | count | loot | name | description | ||
---|---|---|---|---|---|---|
match | ||||||
0 | 0 | 2 Hydraulic Piston | 2 | Hydraulic Piston | New Mining Tools | We are producing new Mining Tools for new Pros... |
1 | 10 Hardened Metals | 10 | Hardened Metals | New Mining Tools | We are producing new Mining Tools for new Pros... | |
4 | 0 | 4 Derelict Explosives | 4 | Derelict Explosives | Explosive Excavation | One of our mines collapsed with valuable equip... |
8 | 0 | 2 Zero Systems | 2 | Zero Systems | Mining Bot | Our engineers have designed an autonomous mini... |
1 | 3 Ball Bearings | 3 | Ball Bearings | Mining Bot | Our engineers have designed an autonomous mini... | |
12 | 0 | 2 Toxic Glands | 2 | Toxic Glands | None of your Business | Prospector. We need Toxic Glands. Don't ask qu... |
16 | 0 | 1 Master Unit | 1 | Master Unit | Insufficient Processing Power | Prospector! The Zero Systems CPU you brought u... |
20 | 0 | 3 Ball Bearings | 3 | Ball Bearings | Excavator Improvements | The suspension on our mining excavators need i... |
1 | 3 Hydraulic Piston | 3 | Hydraulic Piston | Excavator Improvements | The suspension on our mining excavators need i... | |
24 | 0 | 4 Hardened Bone | 4 | Hardened Bone | A new type of Alloy | Our scientists are confident they can create a... |
1 | 12 Compound Sheets | 12 | Compound Sheets | A new type of Alloy | Our scientists are confident they can create a... | |
28 | 0 | 5 Zero Systems | 5 | Zero Systems | Automated Security | We will have to build new turrets to help prot... |
1 | 16 Hardened Metals | 16 | Hardened Metals | Automated Security | We will have to build new turrets to help prot... | |
32 | 0 | 4 Miniature Reactor | 4 | Miniature Reactor | Energy Crisis | Veltecite supplies are low, but we need energy... |
36 | 0 | 10 Derelict Explosives | 10 | Derelict Explosives | Classified I | Prospector! We need Derelict Explosives, Maste... |
Finally, we’ll drop all those extra columns we don’t need.
= tmp.reset_index().drop([
tasks 'level_0',
'match',
0
=1 )
], axis
= tasks[['name', 'count', 'loot', 'description']]
tasks 15) tasks.head(
name | count | loot | description | |
---|---|---|---|---|
0 | New Mining Tools | 2 | Hydraulic Piston | We are producing new Mining Tools for new Pros... |
1 | New Mining Tools | 10 | Hardened Metals | We are producing new Mining Tools for new Pros... |
2 | Explosive Excavation | 4 | Derelict Explosives | One of our mines collapsed with valuable equip... |
3 | Mining Bot | 2 | Zero Systems | Our engineers have designed an autonomous mini... |
4 | Mining Bot | 3 | Ball Bearings | Our engineers have designed an autonomous mini... |
5 | None of your Business | 2 | Toxic Glands | Prospector. We need Toxic Glands. Don't ask qu... |
6 | Insufficient Processing Power | 1 | Master Unit | Prospector! The Zero Systems CPU you brought u... |
7 | Excavator Improvements | 3 | Ball Bearings | The suspension on our mining excavators need i... |
8 | Excavator Improvements | 3 | Hydraulic Piston | The suspension on our mining excavators need i... |
9 | A new type of Alloy | 4 | Hardened Bone | Our scientists are confident they can create a... |
10 | A new type of Alloy | 12 | Compound Sheets | Our scientists are confident they can create a... |
11 | Automated Security | 5 | Zero Systems | We will have to build new turrets to help prot... |
12 | Automated Security | 16 | Hardened Metals | We will have to build new turrets to help prot... |
13 | Energy Crisis | 4 | Miniature Reactor | Veltecite supplies are low, but we need energy... |
14 | Classified I | 10 | Derelict Explosives | Prospector! We need Derelict Explosives, Maste... |
Conclusions
And, there we have it! Another piece to the puzzle solved. Next we’re going to work to combine all the faction rewards, the job requirements and the loot tables together to finally calculate which jobs you should definitly avoid doing.